pax_global_header00006660000000000000000000000064143464764160014531gustar00rootroot0000000000000052 comment=d303b4bbefe08b47296ae6c99223bc2303dae757 hipBLAS-rocm-5.5.1/000077500000000000000000000000001434647641600137215ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/.clang-format000066400000000000000000000065421434647641600163030ustar00rootroot00000000000000# Style file for MLSE Libraries based on the modified rocBLAS style # Common settings BasedOnStyle: WebKit TabWidth: 4 IndentWidth: 4 UseTab: Never ColumnLimit: 100 # Other languages JavaScript, Proto --- Language: Cpp # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code # int formatted_code; # // clang-format off # void unformatted_code ; # // clang-format on # void formatted_code_again; DisableFormat: false Standard: Cpp11 AccessModifierOffset: -4 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: false AllowAllArgumentsOnNextLine: true AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: false BinPackParameters: false # Configure each individual brace in BraceWrapping BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: { AfterCaseLabel: 'true' AfterClass: 'true' AfterControlStatement: 'true' AfterEnum : 'true' AfterFunction : 'true' AfterNamespace : 'true' AfterStruct : 'true' AfterUnion : 'true' BeforeCatch : 'true' BeforeElse : 'true' IndentBraces : 'false' # AfterExternBlock : 'true' } #BreakAfterJavaFieldAnnotations: true #BreakBeforeInheritanceComma: false #BreakBeforeBinaryOperators: None #BreakBeforeTernaryOperators: true #BreakConstructorInitializersBeforeComma: true #BreakStringLiterals: true CommentPragmas: '^ IWYU pragma:' #CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true SpaceBeforeCpp11BracedList: false DerivePointerAlignment: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IndentCaseLabels: false IndentPPDirectives: None #FixNamespaceComments: true IndentWrappedFunctionNames: true KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' #JavaScriptQuotes: Double MaxEmptyLinesToKeep: 1 NamespaceIndentation: All ObjCBlockIndentWidth: 4 #ObjCSpaceAfterProperty: true #ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: Never SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false #SpaceAfterTemplateKeyword: true #SpaceBeforeInheritanceColon: true #SortUsingDeclarations: true SortIncludes: true # Comments are for developers, they should arrange them ReflowComments: false #IncludeBlocks: Preserve --- hipBLAS-rocm-5.5.1/.githooks/000077500000000000000000000000001434647641600156265ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/.githooks/install000077500000000000000000000002221434647641600172160ustar00rootroot00000000000000#!/usr/bin/env bash cd $(git rev-parse --git-dir) cd hooks echo "Installing hooks..." ln -s ../../.githooks/pre-commit pre-commit echo "Done!" hipBLAS-rocm-5.5.1/.githooks/pre-commit000077500000000000000000000046321434647641600176350ustar00rootroot00000000000000#!/bin/bash # # This pre-commit hook checks if any versions of clang-format # are installed, and if so, uses the installed version to format # the staged changes. export PATH=$PATH:/opt/rocm/llvm/bin:/usr/bin:/bin # Redirect stdout to stderr. exec >&2 # Do everything from top - level cd $(git rev-parse --show-toplevel) if git rev-parse --verify HEAD >/dev/null 2>&1; then against=HEAD else # Initial commit: diff against an empty tree object against=8c64666c40d9eff214389fbcc1648331fb17a029 fi if [[ "$1" == "--reformat" ]]; then files=$(git ls-files --exclude-standard) else files=$(git diff-index --cached --name-only $against) fi [[ -z "$files" ]] && exit # Change the copyright date at the top of any text files for file in $files; do [[ -L $file ]] && continue echo "Processing copyright dates in $file" if [[ -e $file ]]; then /usr/bin/perl -pi -e 'INIT { exit 1 if !-f $ARGV[0] || -B $ARGV[0]; $year = (localtime)[5] + 1900 } s/^([*\/#\/"*[:space:]]*)Copyright\s+(?:\(C\)\s*)?(\d+)(?:\s*-\s*\d+)?\s(Advanced\s*Micro\s*Devices)/qq($1Copyright (C) $2@{[$year != $2 ? "-$year" : ""]} $3)/ie if $. < 10' "$file" && git add -u "$file" fi done # do the formatting for file in $files; do [[ -L $file ]] && continue if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.in$|\.txt$|\.yaml$|\.yml$|\.sh$|\.py$|\.pl$|\.cmake$|\.md$|\.rst$|\.groovy$|\.ini$|\.awk$|\.csv$'; then echo "Processing line endings in $file" sed -i -e 's/[[:space:]]*$//' "$file" # Remove whitespace at end of lines sed -i -e '$a\' "$file" # Add missing newline to end of file echo "Converting non-ASCII characters to ASCII equivalents in $file" # Convert UTF8 non-ASCII to ASCII temp=$(mktemp) [[ -w $temp ]] || exit iconv -s -f utf-8 -t ascii//TRANSLIT "$file" > "$temp" || exit chmod --reference="$file" "$temp" || exit mv -f "$temp" "$file" || exit git add -u "$file" fi done # if clang-format exists, run it on C/C++ files if command -v clang-format >/dev/null; then for file in $files; do [[ -L $file ]] && continue if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.h\.in$|\.hpp\.in$|\.cpp\.in$'; then echo "clang-format $file" clang-format -i -style=file "$file" git add -u "$file" fi done fi hipBLAS-rocm-5.5.1/.github/000077500000000000000000000000001434647641600152615ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/.github/CODEOWNERS000066400000000000000000000001161434647641600166520ustar00rootroot00000000000000* @amcamd @TorreZuk @mahmoodw @daineAMD @bragadeesh @NaveenElumalaiAMD @rkamd hipBLAS-rocm-5.5.1/.github/CONTRIBUTING.md000066400000000000000000000014101434647641600175060ustar00rootroot00000000000000## How to contribute Our code contriubtion guidelines closely follows the model of [GitHub pull-requests](https://help.github.com/articles/using-pull-requests/). The hipBLAS repository follows a workflow which dictates a /master branch where releases are cut, and a /develop branch which serves as an integration branch for new code. ## Pull-request guidelines * target the **develop** branch for integration * ensure code builds successfully. * do not break existing test cases * new functionality will only be merged with new unit tests * new unit tests should integrate within the existing googletest framework. * tests must have good code coverage * code must also have benchmark tests, and performance must approach the compute bound limit or memory bound limit. hipBLAS-rocm-5.5.1/.github/ISSUE_TEMPLATE.md000066400000000000000000000004531434647641600177700ustar00rootroot00000000000000### What is the expected behavior - ### What actually happens - ### How to reproduce - ### Environment | Hardware | description | |-----|-----| | GPU | device string | | CPU | device string | | Software | version | |-----|-----| | ROCK | v0.0 | | ROCR | v0.0 | | HCC | v0.0 | | Library | v0.0 | hipBLAS-rocm-5.5.1/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000000621434647641600210600ustar00rootroot00000000000000resolves #___ Summary of proposed changes: - - - hipBLAS-rocm-5.5.1/.github/workflows/000077500000000000000000000000001434647641600173165ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/.github/workflows/docs.yaml000066400000000000000000000045551434647641600211430ustar00rootroot00000000000000name: Upload to the upload server # Controls when the workflow will run on: push: branches: [develop, master] tags: - rocm-5.* release: types: [published] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: # This workflow contains a single job called "build" build: # The type of runner that the job will run on runs-on: ubuntu-latest # Steps represent a sequence of tasks that will be executed as part of the job steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - name: getting branch name shell: bash run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" id: branch_name - name: getting tag name shell: bash run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})" id: tag_name - name: zipping files run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*' - name: echo-step run: echo "${{ github.event.release.target_commitish }}" - name: uploading archive to prod if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.PROD_UPLOAD_URL }}' args: '-o ConnectTimeout=5' - name: uploading archive to staging if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.STG_UPLOAD_URL }}' args: '-o ConnectTimeout=5' hipBLAS-rocm-5.5.1/.gitignore000066400000000000000000000005361434647641600157150ustar00rootroot00000000000000# Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod *.smod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app # Editors .vscode # build-in-source directory build* # emacs temporary/backup files .\#* \#*\# *~ hipBLAS-rocm-5.5.1/.jenkins/000077500000000000000000000000001434647641600154405ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/.jenkins/common.groovy000066400000000000000000000045131434647641600202020ustar00rootroot00000000000000// This file is for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. def runCompileCommand(platform, project, jobName, boolean sameOrg=false) { project.paths.construct_build_prefix() def getDependenciesCommand = "" if (project.installLibraryDependenciesFromCI) { project.libraryDependencies.each { libraryName -> getDependenciesCommand += auxiliary.getLibrary(libraryName, platform.jenkinsLabel, 'develop', sameOrg) } } String centos = platform.jenkinsLabel.contains('centos7') ? 'source scl_source enable devtoolset-7' : ':' def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix} ${getDependenciesCommand} ${centos} LD_LIBRARY_PATH=/opt/rocm/lib ${project.paths.build_command} """ platform.runCommand(this, command) } def runTestCommand (platform, project) { String sudo = auxiliary.sudo(platform.jenkinsLabel) def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix}/build/release/clients/staging ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas-test --gtest_output=xml --gtest_color=yes """ platform.runCommand(this, command) junit "${project.paths.project_build_prefix}/build/release/clients/staging/*.xml" } def runPackageCommand(platform, project, jobName, label='') { def command label = label != '' ? '-' + label.toLowerCase() : '' String ext = platform.jenkinsLabel.contains('ubuntu') ? "deb" : "rpm" String dir = jobName.contains('Debug') ? "debug" : "release" command = """ set -x cd ${project.paths.project_build_prefix}/build/${dir} make package mkdir -p package if [ ! -z "$label" ] then for f in hipblas*.$ext do mv "\$f" "hipblas${label}-\${f#*-}" done fi mv *.${ext} package/ """ platform.runCommand(this, command) platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/build/${dir}/package/*.${ext}""") } return this hipBLAS-rocm-5.5.1/.jenkins/multicompiler.groovy000066400000000000000000000062221434647641600215760ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipBLAS', 'MultiCompiler') //customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['rocBLAS-internal', 'rocSOLVER'] // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c --compiler=g++' String hipClangBuildCommand = './install.sh -c --compiler=/opt/rocm/hip/bin/hipcc' String clangBuildCommand = './install.sh -c --compiler=clang++' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') setupCI(urlJobName, jobNameList, hipClangBuildCommand, runCI, 'hip-clang') } hipBLAS-rocm-5.5.1/.jenkins/precheckin-cuda.groovy000066400000000000000000000050421434647641600217350ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipBLAS', 'PreCheckin-CUDA') //customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = [] // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI(['ubuntu20-cuda11':['anycuda']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = [] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = [:] propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c --compiler=g++ --cuda' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') } hipBLAS-rocm-5.5.1/.jenkins/precheckin.groovy000066400000000000000000000056441434647641600210330ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipBLAS', 'PreCheckin') //customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['rocBLAS-internal', 'rocSOLVER'] // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c --compiler=g++' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') } hipBLAS-rocm-5.5.1/.jenkins/staticanalysis.groovy000066400000000000000000000017361434647641600217510ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('hipBLAS', 'Static Analysis') // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = true boolean staticAnalysis = true buildProject(prj, formatCheck, nodes.dockerArray, null, null, null, staticAnalysis) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 2')])])) stage(urlJobName) { runCI([ubuntu20:['any']], urlJobName) } } hipBLAS-rocm-5.5.1/.jenkins/staticlibrary.groovy000066400000000000000000000046241434647641600215710ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('hipBLAS', 'StaticLibrary') prj.paths.build_command = './install.sh -cd --static -p /opt/rocm/lib/cmake' prj.libraryDependencies = ['rocBLAS-internal', 'rocSOLVER'] // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = true def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName, true) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu18:['gfx900']], urlJobName) } } } hipBLAS-rocm-5.5.1/.readthedocs.yaml000066400000000000000000000004171434647641600171520ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/source/conf.py formats: all python: version: "3.7" install: - requirements: docs/source/requirements.txt hipBLAS-rocm-5.5.1/CHANGELOG.md000066400000000000000000000163501434647641600155370ustar00rootroot00000000000000# Change Log for hipBLAS ## hipBLAS 0.54.0 for ROCm 5.5.0 ### Added - added option to opt-in to use __half for hipblasHalf type in the API for c++ users who define HIPBLAS_USE_HIP_HALF - added scripts to plot performance for multiple functions - data driven hipblas-bench and hipblas-test execution via external yaml format data files - client smoke test added for quick validation using command hipblas-test --yaml hipblas_smoke.yaml ### Fixed - fixed datatype conversion functions to support more rocBLAS/cuBLAS datatypes - fixed geqrf to return successfully when nullptrs are passed in with n == 0 || m == 0 - fixed getrs to return successfully when given nullptrs with corresponding size = 0 - fixed getrs to give info = -1 when transpose is not an expected type - fixed gels to return successfully when given nullptrs with corresponding size = 0 - fixed gels to give info = -1 when transpose is not in ('N', 'T') for real cases or not in ('N', 'C') for complex cases ### Changed - changed reference code for Windows to OpenBLAS - hipblas client executables all now begin with hipblas- prefix ## hipBLAS 0.53.0 for ROCm 5.4.0 ### Added - Allow for selection of int8 datatype - Added support for hipblasXgels and hipblasXgelsStridedBatched operations (with s,d,c,z precisions), only supported with rocBLAS backend - Added support for hipblasXgelsBatched operations (with s,d,c,z precisions) ## hipBLAS 0.52.0 for ROCm 5.3.0 ### Added - Added --cudapath option to install.sh to allow user to specify which cuda build they would like to use. - Added --installcuda option to install.sh to install cuda via a package manager. Can be used with new --installcudaversion option to specify which version of cuda to install. ### Fixed - Fixed #includes to support a compiler version. - Fixed client dependency support in install.sh ## hipBLAS 0.51.0 for ROCm 5.2.0 ### Added - Packages for test and benchmark executables on all supported OSes using CPack. - Added File/Folder Reorg Changes with backward compatibility support enabled using ROCM-CMAKE wrapper functions - Added user-specified initialization option to hipblas-bench ### Fixed - Fixed version gathering in performance measuring script ## hipBLAS 0.50.0 for ROCm 5.1.0 ### Added - Added library version and device information to hipblas-test output - Added --rocsolver-path command line option to choose path to pre-built rocSOLVER, as absolute or relative path - Added --cmake_install command line option to update cmake to minimum version if required - Added cmake-arg parameter to pass in cmake arguments while building - Added infrastructure to support readthedocs hipBLAS documentation. ### Fixed - Added hipblasVersionMinor define. hipblaseVersionMinor remains defined for backwards compatibility. - Doxygen warnings in hipblas.h header file. ### Changed - rocblas-path command line option can be specified as either absolute or relative path - Help message improvements in install.sh and rmake.py - Updated googletest dependency from 1.10.0 to 1.11.0 ## hipBLAS 0.49.0 for ROCm 5.0.0 ### Added - Added rocSOLVER functions to hipblas-bench - Added option ROCM_MATHLIBS_API_USE_HIP_COMPLEX to opt-in to use hipFloatComplex and hipDoubleComplex - Added compilation warning for future trmm changes - Added documentation to hipblas.h - Added option to forgo pivoting for getrf and getri when ipiv is nullptr - Added code coverage option ### Fixed - Fixed use of incorrect 'HIP_PATH' when building from source. - Fixed windows packaging - Allowing negative increments in hipblas-bench - Removed boost dependency ## hipBLAS 0.48.0 for ROCm 4.5.0 ### Added - Added more support for hipblas-bench - Added HIPBLAS_STATUS_UNKNOWN for unsupported backend status codes ### Fixed - Avoid large offset overflow for gemv and hemv in hipblas-test ### Changed - Packaging split into a runtime package called hipblas and a development package called hipblas-devel. The development package depends on runtime. The runtime package suggests the development package for all supported OSes except CentOS 7 to aid in the transition. The suggests feature in packaging is introduced as a deprecated feature and will be removed in a future rocm release. ## hipBLAS 0.46.0 for ROCm 4.3.0 ### Added - Added hipblasStatusToString ### Fixed - Added catch() blocks around API calls to prevent the leak of C++ exceptions ## hipBLAS 0.44.0 for ROCm 4.2.0 ### Added - Made necessary changes to work with rocBLAS' gemm_ex changes. When using rocBLAS backend, hipBLAS will query the preferable layout of int8 data to be passed to gemm_ex, and will pass in the resulting flag. Users must be sure to use the preferable data format when calling gemm_ex with a rocBLAS backend. - Added hipblas-bench with support for: - copy, swap, scal ## hipBLAS 0.42.0 for ROCm 4.1.0 ### Added - Added the following functions. All added functions include batched and strided-batched support with rocBLAS backend: - axpy_ex - dot_ex - nrm2_ex - rot_ex - scal_ex ### Fixed - Fixed complex unit test bug caused by incorrect caxpy and zaxpy function signatures ## hipBLAS 0.40.0 for ROCm 4.0.0 ### Added - Added changelog - Added hipblas-bench with support for: - gemv, trsm, gemm - Added rocSOLVER as a cpack dependency ## hipBLAS 0.38.0 for ROCm 3.10.0 ### Added - Added hipblasSetAtomicsMode and hipblasGetAtomicsMode - No longer look for CUDA backend unless --cuda build flag is passed ## hipBLAS 0.36.0 for ROCm 3.9.0 ### Added - Make device memory reallocate on demand ## hipBLAS 0.34.0 for ROCm 3.8.0 ### Added - Added --static build flag to allow for creating a static library ## hipBLAS 0.32.0 for ROCm 3.7.0 ### Added - Added --rocblas-path command line option to choose path to pre-built rocBLAS - Added sgetriBatched, dgetriBatched, cgetriBatched, and zgetriBatched - Added TrsmEx, TrsmBatchedEx, and TrsmStridedBatchedEx - Added hipblasSetVectorAsync and hipblasGetVectorAsync - Added hipblasSetMatrixAsync and hipblasGetMatrixAsync - Added Fortran support for getrf, getrs, geqrf and all variants thereof ## hipBLAS 0.30.0 for ROCm 3.6.0 ### Added - Added the following functions. All added functions include batched and strided-batched support with rocBLAS backend: - stbsv, dtbsv, ctbsv, ztbsv - ssymm, dsymm, csymm, zsymm - cgeam, zgeam - chemm, zhemm - strtri, dtrtri, ctrtri, ztrtri - sdgmm, ddgmm, cdgmm, zdgmm - Added GemmBatchedEx and GemmStridedBatchedEx - Added Fortran support for BLAS functions ## hipBLAS 0.28.0 for ROCm 3.5.0 ### Added - Added the following functions. All added functions include batched and strided-batched support with rocBLAS backend: - sgbmv, dgbmv, cgbmv, zgbmv - chemv, zhemv - stbmv, dtbmv, ctbmv, ztbmv - strmv, trmv, ctrmv, ztrmv - chbmv, zhbmv - cher, zher - cher2, zher2 - chpmv, zhpmv - chpr, zhpr - chpr2, zhpr2 - ssbmv, dsbmv - sspmv, dspmv - ssymv, dsymv, csymv, zsymv - stpmv, dtpmv, ctpmv, ztpmv - cgeru, cgerc, zgeru, zgerc - sspr, dspr, cspr, zspr - sspr2, dspr2 - csyr, zsyr - ssyr2, dsyr2, csyr2, zsyr2 - stpsv, dtpsv, ctpsv, ztpsv - ctrsv, ztrsv - cherk, zherk - cherkx, zherkx - cher2k, zher2k - ssyrk, dsyrk, csyrk, zsyrk - ssyr2k, dsyr2k, csyr2k, zsyr2k - ssyrkx, dsyrkx, csyrkx, zsyrkx - ctrmm, ztrmm - ctrsm, ztrsm hipBLAS-rocm-5.5.1/CMakeLists.txt000066400000000000000000000253541434647641600164720ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ######################################################################## # The ROCm platform requires Ubuntu 16.04 or Fedora 24, which has cmake 3.5 cmake_minimum_required( VERSION 3.5 ) # We use C++14 features, this will add compile option: -std=c++14 set( CMAKE_CXX_STANDARD 17 ) if (NOT python) set(python "python3") # default for linux endif() # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if( WIN32 ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" ) else( ) set( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories" ) endif( ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() if (NOT WIN32) if ( NOT DEFINED CMAKE_Fortran_COMPILER AND NOT DEFINED ENV{FC} ) set( CMAKE_Fortran_COMPILER "gfortran" ) endif() set( fortran_language "Fortran" ) endif( ) project( hipblas LANGUAGES CXX ${fortran_language} ) # This finds the rocm-cmake project, and installs it if not found # rocm-cmake contains common cmake code for rocm projects to help setup and install set( PROJECT_EXTERN_DIR ${CMAKE_CURRENT_BINARY_DIR}/extern ) find_package( ROCM 0.7.3 CONFIG QUIET PATHS /opt/rocm ) if( NOT ROCM_FOUND ) set( rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download" ) file( DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip ) execute_process( COMMAND ${CMAKE_COMMAND} -E tar xzf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip WORKING_DIRECTORY ${PROJECT_EXTERN_DIR} ) execute_process( COMMAND ${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=${PROJECT_EXTERN_DIR}/rocm-cmake . WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag} ) execute_process( COMMAND ${CMAKE_COMMAND} --build rocm-cmake-${rocm_cmake_tag} --target install WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}) find_package( ROCM 0.7.3 REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake ) endif( ) include( ROCMSetupVersion ) include( ROCMCreatePackage ) include( ROCMInstallTargets ) include( ROCMPackageConfigHelpers ) include( ROCMInstallSymlinks ) include( ROCMClients ) include( ROCMHeaderWrapper ) set ( VERSION_STRING "0.54.0" ) rocm_setup_version( VERSION ${VERSION_STRING} ) # Append our library helper cmake path and the cmake path for hip (for convenience) # Users may override HIP path by specifying their own in CMAKE_MODULE_PATH list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${ROCM_PATH}/lib/cmake/hip /opt/rocm/lib/cmake/hip ${HIP_DIR}/cmake ) # NOTE: workaround until hip cmake modules fixes symlink logic in their config files; remove when fixed list( APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/llvm /opt/rocm/hip ) option( BUILD_VERBOSE "Output additional build information" OFF ) option( BUILD_WITH_SOLVER "Add additional functions from rocSOLVER" ON ) if( BUILD_WITH_SOLVER ) add_definitions( -D__HIP_PLATFORM_SOLVER__ ) endif( ) # BUILD_SHARED_LIBS is a cmake built-in; we make it an explicit option such that it shows in cmake-gui option( BUILD_SHARED_LIBS "Build hipBLAS as a shared library" ON ) # Find CUDA if the user wants a CUDA version. option(USE_CUDA "Look for CUDA and use that as a backend if found" OFF) if (USE_CUDA) find_package( CUDA REQUIRED ) endif() # Hip headers required of all clients; clients use hip to allocate device memory if( USE_CUDA) find_package( HIP MODULE REQUIRED ) else( ) find_package( hip REQUIRED CONFIG PATHS ${HIP_DIR} ${ROCM_PATH} /opt/rocm) endif( ) if( USE_CUDA ) list( APPEND HIP_INCLUDE_DIRS "${HIP_ROOT_DIR}/include" ) endif( ) option(BUILD_CODE_COVERAGE "Build with code coverage enabled" OFF) if(BUILD_CODE_COVERAGE) add_compile_options(-fprofile-arcs -ftest-coverage) add_link_options(--coverage) endif() option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) if(BUILD_ADDRESS_SANITIZER) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") endif() # FOR HANDLING ENABLE/DISABLE OPTIONAL BACKWARD COMPATIBILITY for FILE/FOLDER REORG option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" ON) if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY AND NOT WIN32) rocm_wrap_header_dir( ${CMAKE_SOURCE_DIR}/library/include PATTERNS "*.h" GUARDS SYMLINK WRAPPER WRAPPER_LOCATIONS ${CMAKE_INSTALL_INCLUDEDIR} ) endif() add_subdirectory( library ) include( clients/cmake/build-options.cmake ) # Build clients of the library if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS ) if(NOT CLIENTS_OS) rocm_set_os_id(CLIENTS_OS) string(TOLOWER "${CLIENTS_OS}" CLIENTS_OS) rocm_read_os_release(CLIENTS_OS_VERSION VERSION_ID) endif() message(STATUS "OS: ${CLIENTS_OS} ${CLIENTS_OS_VERSION}") set(GFORTRAN_RPM "libgfortran4") set(GFORTRAN_DEB "libgfortran4") if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel") if(CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "8") set(GFORTRAN_RPM "libgfortran") endif() elseif(CLIENTS_OS STREQUAL "ubuntu" AND CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "20.04") set(GFORTRAN_DEB "libgfortran5") endif() set( BUILD_CLIENTS ON ) rocm_package_setup_component(clients) rocm_package_setup_client_component(clients-common) if(BUILD_CLIENTS_TESTS) rocm_package_setup_client_component( tests DEPENDS COMPONENT clients-common DEB "${GFORTRAN_DEB}" RPM "${GFORTRAN_RPM}") endif() if(BUILD_CLIENTS_BENCHMARKS) rocm_package_setup_client_component( benchmarks DEPENDS COMPONENT clients-common DEB "${GFORTRAN_DEB}" RPM "${GFORTRAN_RPM}") endif() add_subdirectory( clients ) endif( ) # The following code is setting variables to control the behavior of CPack to generate our if( WIN32 ) set( CPACK_SOURCE_GENERATOR "ZIP" ) set( CPACK_GENERATOR "ZIP" ) endif( ) # Package specific CPACK vars if( NOT USE_CUDA ) rocm_package_add_dependencies(DEPENDS "rocblas >= 2.47.0" "rocsolver >= 3.21.0") endif( ) set( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" ) set( CPACK_RPM_PACKAGE_LICENSE "MIT") if (WIN32) SET( CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path" FORCE ) SET( INSTALL_PREFIX "C:/hipSDK" ) SET( CPACK_SET_DESTDIR FALSE ) SET( CPACK_PACKAGE_INSTALL_DIRECTORY "C:/hipSDK" ) SET( CPACK_PACKAGING_INSTALL_PREFIX "" ) set( CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF ) else() if( NOT CPACK_PACKAGING_INSTALL_PREFIX ) set( CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" ) endif() endif( ) set( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" ) # Give hipblas compiled for CUDA backend a different name if( NOT USE_CUDA ) set( package_name hipblas ) else( ) set( package_name hipblas-alt ) endif( ) set( HIPBLAS_CONFIG_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" CACHE PATH "Path placed into ldconfig file" ) rocm_create_package( NAME ${package_name} DESCRIPTION "Radeon Open Compute BLAS marshalling library" MAINTAINER "hipBLAS Maintainer " LDCONFIG LDCONFIG_DIR ${HIPBLAS_CONFIG_DIR} ) # # ADDITIONAL TARGETS FOR CODE COVERAGE # if(BUILD_CODE_COVERAGE) # # > make coverage_cleanup (clean coverage related files.) # > make coverage GTEST_FILTER=<> # will run: # > make coverage_analysis GTEST_FILTER=<> (analyze tests) # > make coverage_output (generate html documentation) # # # Run coverage analysis # set(coverage_test ./clients/staging/hipblas-test) if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(coverage_test ./clients/staging/hipblas-test-d) endif() add_custom_target(coverage_analysis COMMAND echo Coverage GTEST_FILTER=\${GTEST_FILTER} COMMAND ${coverage_test} --gtest_filter=\"\${GTEST_FILTER}\" WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) add_dependencies(coverage_analysis hipblas) # # Prepare coverage output # This little script is generated because the option '--gcov-tool ' of lcov cannot take arguments. # add_custom_target(coverage_output DEPENDS coverage_analysis COMMAND mkdir -p lcoverage COMMAND echo "\\#!/bin/bash" > llvm-gcov.sh COMMAND echo "\\# THIS FILE HAS BEEN GENERATED" >> llvm-gcov.sh COMMAND printf "exec /opt/rocm/llvm/bin/llvm-cov gcov $$\\@" >> llvm-gcov.sh COMMAND chmod +x llvm-gcov.sh ) # # Generate coverage output. # add_custom_command(TARGET coverage_output COMMAND lcov --directory . --base-directory . --gcov-tool ${CMAKE_BINARY_DIR}/llvm-gcov.sh --capture -o lcoverage/raw_main_coverage.info COMMAND lcov --remove lcoverage/raw_main_coverage.info "'/opt/*'" "'/usr/*'" -o lcoverage/main_coverage.info COMMAND genhtml lcoverage/main_coverage.info --output-directory lcoverage ) add_custom_target(coverage DEPENDS coverage_output) # # Coverage cleanup # add_custom_target(coverage_cleanup COMMAND find ${CMAKE_BINARY_DIR} -name *.gcda -delete WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) endif() hipBLAS-rocm-5.5.1/LICENSE.md000066400000000000000000000021271434647641600153270ustar00rootroot00000000000000MIT License Copyright (C) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. hipBLAS-rocm-5.5.1/README.md000066400000000000000000000062671434647641600152130ustar00rootroot00000000000000# **hipBLAS** **hipBLAS** is a Basic Linear Algebra Subprograms (**BLAS**) marshalling library, with multiple supported backends. It sits between the application and a 'worker' BLAS library, marshalling inputs into the backend library and marshalling results back to the application. **hipBLAS** exports an interface that does not require the client to change, regardless of the chosen backend. Currently, **hipBLAS** supports **rocBLAS** and **cuBLAS** as backends. ## Documentation For a detailed description of the **hipBLAS** library, its implemented routines, the installation process and user guide, see the [**hipBLAS** Documentation](https://hipblas.readthedocs.io/en/latest/). hipBLAS requires either **rocBLAS** + **rocSOLVER** or **cuBLAS** APIs for BLAS implementation. For more information dependent **roc*** libraries see [rocBLAS documentation](https://rocblas.readthedocs.io/en/latest/), and [rocSolver documentation](https://rocsolver.readthedocs.io/en/latest/). ## Quickstart build To download the **hipBLAS** source code, use the below command to clone the repository ```bash git clone https://github.com/ROCmSoftwarePlatform/hipBLAS.git ``` **hipBLAS** requires specific version of **rocBLAS** & **rocSOLVER** to be installed on the system. The required **rocBLAS** and **rocSOLVER** versions to build **hipBLAS** is provided [here](https://github.com/ROCmSoftwarePlatform/hipBLAS/blob/develop/library/CMakeLists.txt). Once the dependent libraries are installed, the following command will build hipBLAS and install to `/opt/rocm/hipblas`: ```bash cd hipblas ./install.sh -i ``` ## hipBLAS interface examples The hipBLAS interface is compatible with rocBLAS and cuBLAS-v2 APIs. Porting a CUDA application which originally calls the cuBLAS API to an application calling hipBLAS API should be relatively straightforward. For example, the hipBLAS SGEMV interface is ### GEMV API ```c hipblasStatus_t hipblasSgemv( hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, const float *A, int lda, const float *x, int incx, const float *beta, float *y, int incy ); ``` ### Batched and strided GEMM API hipBLAS GEMM can process matrices in batches with regular strides. There are several permutations of these API's, the following is an example that takes everything ```c hipblasStatus_t hipblasSgemmStridedBatched( hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const float *alpha, const float *A, int lda, long long bsa, const float *B, int ldb, long long bsb, const float *beta, float *C, int ldc, long long bsc, int batchCount); ``` hipBLAS assumes matrices A and vectors x, y are allocated in GPU memory space filled with data. Users are responsible for copying data from/to the host and device memory. ## Supported functionality For a complete list of all supported functions, see the [hipBLAS user guide](https://hipblas.readthedocs.io/en/latest/usermanual.html) and [hipBLAS functions](https://hipblas.readthedocs.io/en/latest/functions.html#hipblas-functions). hipBLAS-rocm-5.5.1/bump_master_version.sh000077500000000000000000000015201434647641600203410ustar00rootroot00000000000000#!/bin/bash # This script needs to be edited to bump old develop version to new master version for new release. # - run this script in develop branch # - after running this script merge develop into master # - after running this script and merging develop into master, run bump_develop_version.sh in master and # merge master into develop # OLD_HIPBLAS_VERSION="0.45.0" # NEW_HIPBLAS_VERSION="0.46.0" # OLD_MINIMUM_ROCBLAS_VERSION="2.38.0" # NEW_MINIMUM_ROCBLAS_VERSION="2.39.0" # OLD_MINIMUM_ROCSOLVER_VERSION="3.13.0" # NEW_MINIMUM_ROCSOLVER_VERSION="3.13.0" # sed -i "s/${OLD_HIPBLAS_VERSION}/${NEW_HIPBLAS_VERSION}/g" CMakeLists.txt # sed -i "s/${OLD_MINIMUM_ROCBLAS_VERSION}/${NEW_MINIMUM_ROCBLAS_VERSION}/g" library/CMakeLists.txt # sed -i "s/${OLD_MINIMUM_ROCSOLVER_VERSION}/${NEW_MINIMUM_ROCSOLVER_VERSION}/g" library/CMakeLists.txt hipBLAS-rocm-5.5.1/bump_staging_version.sh000077500000000000000000000013671434647641600205130ustar00rootroot00000000000000#!/bin/bash # This script needs to be edited to bump new master version to new develop for new release. # - run this script after running bump_master_version.sh and merging develop into master # - run this script in master branch # - after running this script merge master into develop OLD_HIPBLAS_VERSION="0.54.0" NEW_HIPBLAS_VERSION="0.55.0" OLD_MINIMUM_ROCBLAS_VERSION="2.47.0" NEW_MINIMUM_ROCBLAS_VERSION="2.48.0" OLD_MINIMUM_ROCSOLVER_VERSION="3.21.0" NEW_MINIMUM_ROCSOLVER_VERSION="3.22.0" sed -i "s/${OLD_HIPBLAS_VERSION}/${NEW_HIPBLAS_VERSION}/g" CMakeLists.txt sed -i "s/${OLD_MINIMUM_ROCBLAS_VERSION}/${NEW_MINIMUM_ROCBLAS_VERSION}/g" CMakeLists.txt sed -i "s/${OLD_MINIMUM_ROCSOLVER_VERSION}/${NEW_MINIMUM_ROCSOLVER_VERSION}/g" CMakeLists.txt hipBLAS-rocm-5.5.1/clients/000077500000000000000000000000001434647641600153625ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/clients/CMakeLists.txt000066400000000000000000000140111434647641600201170ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ######################################################################## # The ROCm platform requires Ubuntu 16.04 or Fedora 24, which has cmake 3.5 cmake_minimum_required( VERSION 3.5 ) # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if( WIN32 ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" ) else( ) set( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories" ) endif( ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() # This project may compile dependencies for clients project( hipblas-clients LANGUAGES CXX Fortran ) # We use C++14 features, this will add compile option: -std=c++14 set( CMAKE_CXX_STANDARD 17 ) list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ) include( build-options ) if( NOT WIN32 ) set(hipblas_f90_source_clients include/hipblas_fortran.f90 ) set(hipblas_f90_source_clients_solver include/hipblas_fortran_solver.f90) endif() if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_SAMPLES ) if( NOT WIN32 ) if( BUILD_WITH_SOLVER ) add_library(hipblas_fortran_client ${hipblas_f90_source_clients} ${hipblas_f90_source_clients_solver}) else() add_library(hipblas_fortran_client ${hipblas_f90_source_clients}) endif() rocm_install(TARGETS hipblas_fortran_client COMPONENT clients-common) add_dependencies(hipblas_fortran_client hipblas_fortran) endif() include_directories(${CMAKE_BINARY_DIR}/include/hipblas) include_directories(${CMAKE_BINARY_DIR}/include) endif( ) if( BUILD_CLIENTS_SAMPLES ) add_subdirectory( samples ) endif( ) if( BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS) if( NOT WIN32 ) # Linking lapack library requires fortran flags find_package( cblas REQUIRED CONFIG ) if (LINK_BLIS) set( BLIS_INCLUDE_DIR ${BUILD_DIR}/deps/blis/include/blis ) set( BLIS_CPP ../common/blis_interface.cpp ) set( BLAS_LIBRARY ${BUILD_DIR}/deps/blis/lib/libblis.so ) else() set( BLAS_LIBRARY "blas" ) endif() else() # WIN32 set( BLAS_INCLUDE_DIR ${OPENBLAS_DIR}/include CACHE PATH "OpenBLAS library include path" ) find_library( BLAS_LIBRARY libopenblas PATHS ${OPENBLAS_DIR}/lib REQUIRED NO_DEFAULT_PATH ) if (NOT BLAS_LIBRARY) find_package( OPENBLAS CONFIG REQUIRED ) set( BLAS_LIBRARY OpenBLAS::OpenBLAS ) set( BLAS_INCLUDE_DIR "" ) endif() endif() if( BUILD_CLIENTS_TESTS ) add_subdirectory( gtest ) endif( ) if( BUILD_CLIENTS_BENCHMARKS ) add_subdirectory( benchmarks ) endif( ) endif() set( HIPBLAS_COMMON "${PROJECT_BINARY_DIR}/staging/hipblas_common.yaml") add_custom_command( OUTPUT "${HIPBLAS_COMMON}" COMMAND ${CMAKE_COMMAND} -E copy include/hipblas_common.yaml "${HIPBLAS_COMMON}" DEPENDS include/hipblas_common.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) set( HIPBLAS_TEMPLATE "${PROJECT_BINARY_DIR}/staging/hipblas_template.yaml") add_custom_command( OUTPUT "${HIPBLAS_TEMPLATE}" COMMAND ${CMAKE_COMMAND} -E copy include/hipblas_template.yaml "${HIPBLAS_TEMPLATE}" DEPENDS include/hipblas_template.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) set( HIPBLAS_SMOKE "${PROJECT_BINARY_DIR}/staging/hipblas_smoke.yaml") add_custom_command( OUTPUT "${HIPBLAS_SMOKE}" COMMAND ${CMAKE_COMMAND} -E copy include/hipblas_smoke.yaml "${HIPBLAS_SMOKE}" DEPENDS include/hipblas_smoke.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) set( HIPBLAS_GENTEST "${PROJECT_BINARY_DIR}/staging/hipblas_gentest.py") add_custom_command( OUTPUT "${HIPBLAS_GENTEST}" COMMAND ${CMAKE_COMMAND} -E copy common/hipblas_gentest.py "${HIPBLAS_GENTEST}" DEPENDS common/hipblas_gentest.py WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) add_custom_target( hipblas-common DEPENDS "${HIPBLAS_COMMON}" "${HIPBLAS_TEMPLATE}" "${HIPBLAS_SMOKE}" "${HIPBLAS_GENTEST}" ) rocm_install( FILES ${HIPBLAS_COMMON} ${HIPBLAS_TEMPLATE} ${HIPBLAS_SMOKE} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clients-common ) rocm_install( PROGRAMS ${HIPBLAS_GENTEST} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clients-common ) hipBLAS-rocm-5.5.1/clients/benchmarks/000077500000000000000000000000001434647641600174775ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/clients/benchmarks/CMakeLists.txt000066400000000000000000000102761434647641600222450ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ######################################################################## set( THREADS_PREFER_PTHREAD_FLAG ON ) find_package( Threads REQUIRED ) # Linking lapack library requires fortran flags enable_language( Fortran ) set(hipblas_bench_source client.cpp) if( NOT TARGET hipblas ) find_package( hipblas REQUIRED CONFIG PATHS /opt/rocm/hipblas ) endif( ) set( hipblas_benchmark_common ../common/utility.cpp ../common/cblas_interface.cpp ../common/clients_common.cpp ../common/hipblas_arguments.cpp ../common/hipblas_parse_data.cpp ../common/hipblas_datatype2string.cpp ../common/norm.cpp ../common/unit.cpp ../common/near.cpp ../common/arg_check.cpp ../common/argument_model.cpp ../common/hipblas_template_specialization.cpp ${BLIS_CPP} ) add_executable( hipblas-bench ${hipblas_bench_source} ${hipblas_benchmark_common} ) target_compile_features( hipblas-bench PRIVATE cxx_static_assert cxx_nullptr cxx_auto_type ) if(LINK_BLIS) target_link_libraries( hipblas-bench PRIVATE ${BLIS_LIBRARY} ) elseif(NOT WIN32) target_link_libraries( hipblas-bench PRIVATE blas ) endif() # Internal header includes target_include_directories( hipblas-bench PRIVATE $ ) # External header includes included as system files target_include_directories( hipblas-bench SYSTEM PRIVATE $ $ $ $ ) if (NOT WIN32) target_link_libraries( hipblas-bench PRIVATE hipblas_fortran_client lapack cblas stdc++fs ) endif() target_link_libraries( hipblas-bench PRIVATE ${BLAS_LIBRARY} roc::hipblas Threads::Threads ) # need mf16c flag for float->half convertion target_compile_options( hipblas-bench PRIVATE -mf16c ) # -Wno-deprecated-declarations ) target_compile_definitions( hipblas-bench PRIVATE HIPBLAS_BENCH HIPBLAS_BFLOAT16_CLASS ROCM_USE_FLOAT16 ) if( NOT USE_CUDA ) target_link_libraries( hipblas-bench PRIVATE hip::host ) if( CUSTOM_TARGET ) target_link_libraries( hipblas-bench PRIVATE hip::${CUSTOM_TARGET} ) endif() if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) # hip-clang needs specific flag to turn on pthread and m target_link_libraries( hipblas-bench PRIVATE -lpthread -lm ) if(BUILD_ADDRESS_SANITIZER) target_link_libraries( hipblas-bench PRIVATE -fuse-ld=lld -lgfortran ) endif() endif() else( ) target_compile_definitions( hipblas-bench PRIVATE __HIP_PLATFORM_NVCC__ ) target_include_directories( hipblas-bench PRIVATE $ ) target_link_libraries( hipblas-bench PRIVATE ${CUDA_LIBRARIES} ) endif( ) set_target_properties( hipblas-bench PROPERTIES DEBUG_POSTFIX "-d" CXX_EXTENSIONS OFF RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) add_dependencies( hipblas-bench hipblas-common ) rocm_install(TARGETS hipblas-bench COMPONENT benchmarks) hipBLAS-rocm-5.5.1/clients/benchmarks/client.cpp000066400000000000000000000375451434647641600214770ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "program_options.hpp" #include "hipblas.hpp" #include "argument_model.hpp" #include "clients_common.hpp" #include "hipblas_data.hpp" #include "hipblas_datatype2string.hpp" #include "hipblas_parse_data.hpp" #include "test_cleanup.hpp" #include "type_dispatch.hpp" #include "utility.h" #include #include #include #include #include #include #include #include #include #include using namespace roc; // For emulated program_options typedef int hipblas_int; int hipblas_bench_datafile() { int ret = 0; for(Arguments arg : HipBLAS_TestData()) ret |= run_bench_test(arg, 0, 1); test_cleanup::cleanup(); return ret; } void thread_init_device(int id, const Arguments& arg) { int count; CHECK_HIP_ERROR(hipGetDeviceCount(&count)); if(id < count) CHECK_HIP_ERROR(hipSetDevice(id)); Arguments a(arg); a.cold_iters = 1; a.iters = 0; run_bench_test(a, 0, 1); } void thread_run_bench(int id, const Arguments& arg) { int count; CHECK_HIP_ERROR(hipGetDeviceCount(&count)); if(id < count) CHECK_HIP_ERROR(hipSetDevice(id)); Arguments a(arg); run_bench_test(a, 0, 1); } int run_bench_multi_gpu_test(int parallel_devices, Arguments& arg) { int count; CHECK_HIP_ERROR(hipGetDeviceCount(&count)); if(parallel_devices > count || parallel_devices < 1) return 1; // initialization auto thread_init = std::make_unique(parallel_devices); for(int id = 0; id < parallel_devices; ++id) thread_init[id] = std::thread(::thread_init_device, id, arg); for(int id = 0; id < parallel_devices; ++id) thread_init[id].join(); // synchronzied launch of cold & hot calls auto thread = std::make_unique(parallel_devices); for(int id = 0; id < parallel_devices; ++id) thread[id] = std::thread(::thread_run_bench, id, arg); for(int id = 0; id < parallel_devices; ++id) thread[id].join(); return 0; } // Replace --batch with --batch_count for backward compatibility void fix_batch(int argc, char* argv[]) { static char b_c[] = "--batch_count"; for(int i = 1; i < argc; ++i) if(!strcmp(argv[i], "--batch")) { static int once = (std::cerr << argv[0] << " warning: --batch is deprecated, and --batch_count " "should be used instead." << std::endl, 0); argv[i] = b_c; } } int main(int argc, char* argv[]) try { fix_batch(argc, argv); Arguments arg; std::string function; std::string precision; std::string a_type; std::string b_type; std::string c_type; std::string d_type; std::string compute_type; std::string initialization; hipblas_int device_id; hipblas_int parallel_devices; bool datafile = hipblas_parse_data(argc, argv); bool atomics_not_allowed = false; bool log_function_name = false; bool log_datatype = false; options_description desc("hipblas-bench command line options"); // clang-format off desc.add_options() ("sizem,m", value(&arg.M)->default_value(128), "Specific matrix size: sizem is only applicable to BLAS-2 & BLAS-3: the number of " "rows or columns in matrix.") ("sizen,n", value(&arg.N)->default_value(128), "Specific matrix/vector size: BLAS-1: the length of the vector. BLAS-2 & " "BLAS-3: the number of rows or columns in matrix") ("sizek,k", value(&arg.K)->default_value(128), "Specific matrix size: BLAS-2: the number of sub or super-diagonals of A. BLAS-3: " "the number of columns in A and rows in B.") ("kl", value(&arg.KL)->default_value(128), "Specific matrix size: kl is only applicable to BLAS-2: The number of sub-diagonals " "of the banded matrix A.") ("ku", value(&arg.KU)->default_value(128), "Specific matrix size: ku is only applicable to BLAS-2: The number of super-diagonals " "of the banded matrix A.") ("lda", value(&arg.lda)->default_value(128), "Leading dimension of matrix A, is only applicable to BLAS-2 & BLAS-3.") ("ldb", value(&arg.ldb)->default_value(128), "Leading dimension of matrix B, is only applicable to BLAS-2 & BLAS-3.") ("ldc", value(&arg.ldc)->default_value(128), "Leading dimension of matrix C, is only applicable to BLAS-2 & BLAS-3.") ("ldd", value(&arg.ldd)->default_value(128), "Leading dimension of matrix D, is only applicable to BLAS-EX ") ("stride_a", value(&arg.stride_a)->default_value(128*128), "Specific stride of strided_batched matrix A, is only applicable to strided batched" "BLAS-2 and BLAS-3: second dimension * leading dimension.") ("stride_b", value(&arg.stride_b)->default_value(128*128), "Specific stride of strided_batched matrix B, is only applicable to strided batched" "BLAS-2 and BLAS-3: second dimension * leading dimension.") ("stride_c", value(&arg.stride_c)->default_value(128*128), "Specific stride of strided_batched matrix C, is only applicable to strided batched" "BLAS-2 and BLAS-3: second dimension * leading dimension.") ("stride_d", value(&arg.stride_d)->default_value(128*128), "Specific stride of strided_batched matrix D, is only applicable to strided batched" "BLAS_EX: second dimension * leading dimension.") ("stride_x", value(&arg.stride_x)->default_value(128), "Specific stride of strided_batched vector x, is only applicable to strided batched" "BLAS_2: second dimension.") ("stride_y", value(&arg.stride_y)->default_value(128), "Specific stride of strided_batched vector y, is only applicable to strided batched" "BLAS_2: leading dimension.") ("incx", value(&arg.incx)->default_value(1), "increment between values in x vector") ("incy", value(&arg.incy)->default_value(1), "increment between values in y vector") ("alpha", value(&arg.alpha)->default_value(1.0), "specifies the scalar alpha") ("alphai", value(&arg.alphai)->default_value(0.0), "specifies the imaginary part of the scalar alpha") ("beta", value(&arg.beta)->default_value(0.0), "specifies the scalar beta") ("betai", value(&arg.betai)->default_value(0.0), "specifies the imaginary part of the scalar beta") ("function,f", value(&function), "BLAS function to test.") ("precision,r", value(&precision)->default_value("f32_r"), "Precision. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("a_type", value(&a_type), "Precision of matrix A. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("b_type", value(&b_type), "Precision of matrix B. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("c_type", value(&c_type), "Precision of matrix C. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("d_type", value(&d_type), "Precision of matrix D. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("compute_type", value(&compute_type), "Precision of computation. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("initialization", value(&initialization)->default_value("hpl"), "Intialize with random integers, trig functions sin and cos, or hpl-like input. " "Options: rand_int, trig_float, hpl") ("transposeA", value(&arg.transA)->default_value('N'), "N = no transpose, T = transpose, C = conjugate transpose") ("transposeB", value(&arg.transB)->default_value('N'), "N = no transpose, T = transpose, C = conjugate transpose") ("side", value(&arg.side)->default_value('L'), "L = left, R = right. Only applicable to certain routines") ("uplo", value(&arg.uplo)->default_value('U'), "U = upper, L = lower. Only applicable to certain routines") // xsymv xsyrk xsyr2k xtrsm xtrsm_ex // xtrmm xtrsv ("diag", value(&arg.diag)->default_value('N'), "U = unit diagonal, N = non unit diagonal. Only applicable to certain routines") // xtrsm xtrsm_ex xtrsv xtrmm ("batch_count", value(&arg.batch_count)->default_value(1), "Number of matrices. Only applicable to batched and strided_batched routines") ("verify,v", value(&arg.norm_check)->default_value(0), "Validate GPU results with CPU? 0 = No, 1 = Yes (default: No)") ("iters,i", value(&arg.iters)->default_value(10), "Iterations to run inside timing loop") ("cold_iters,j", value(&arg.cold_iters)->default_value(2), "Cold Iterations to run before entering the timing loop") ("algo", value(&arg.algo)->default_value(0), "extended precision gemm algorithm") ("solution_index", value(&arg.solution_index)->default_value(0), "extended precision gemm solution index") ("flags", value(&arg.flags)->default_value(0), "gemm_ex flags") ("atomics_not_allowed", bool_switch(&atomics_not_allowed)->default_value(false), "Atomic operations with non-determinism in results are not allowed") ("device", value(&device_id)->default_value(0), "Set default device to be used for subsequent program runs") ("parallel_devices", value(¶llel_devices)->default_value(0), "Set number of devices used for parallel runs (device 0 to parallel_devices-1)") // ("c_noalias_d", // bool_switch(&arg.c_noalias_d)->default_value(false), // "C and D are stored in separate memory") ("log_function_name", bool_switch(&log_function_name)->default_value(false), "Function name precedes other itmes.") ("log_datatype", bool_switch(&log_datatype)->default_value(false), "Include datatypes used in output.") ("fortran", bool_switch(&arg.fortran)->default_value(false), "Run using Fortran interface") ("help,h", "produces this help message"); //("version", "Prints the version number"); // clang-format on variables_map vm; store(parse_command_line(argc, argv, desc), vm); notify(vm); if((argc <= 1 && !datafile) || vm.count("help")) { std::cout << desc << std::endl; return 0; } // if(vm.find("version") != vm.end()) // { // char blas_version[100]; // hipblas_get_version_string(blas_version, sizeof(blas_version)); // std::cout << "hipBLAS version: " << blas_version << std::endl; // return 0; // } // transfer local variable state arg.atomics_mode = atomics_not_allowed ? HIPBLAS_ATOMICS_NOT_ALLOWED : HIPBLAS_ATOMICS_ALLOWED; ArgumentModel_set_log_function_name(log_function_name); ArgumentModel_set_log_datatype(log_datatype); // Device Query hipblas_int device_count = query_device_property(); std::cout << std::endl; if(device_count <= device_id) throw std::invalid_argument("Invalid Device ID"); set_device(device_id); if(datafile) return hipblas_bench_datafile(); std::transform(precision.begin(), precision.end(), precision.begin(), ::tolower); auto prec = string2hipblas_datatype(precision); if(prec == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --precision " + precision); arg.a_type = a_type == "" ? prec : string2hipblas_datatype(a_type); if(arg.a_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --a_type " + a_type); arg.b_type = b_type == "" ? prec : string2hipblas_datatype(b_type); if(arg.b_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --b_type " + b_type); arg.c_type = c_type == "" ? prec : string2hipblas_datatype(c_type); if(arg.c_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --c_type " + c_type); arg.d_type = d_type == "" ? prec : string2hipblas_datatype(d_type); if(arg.d_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --d_type " + d_type); arg.compute_type = compute_type == "" ? prec : string2hipblas_datatype(compute_type); if(arg.compute_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --compute_type " + compute_type); arg.initialization = string2hipblas_initialization(initialization); if(arg.initialization == static_cast(0)) // invalid enum throw std::invalid_argument("Invalid value for --initialization " + initialization); if(arg.M < 0) throw std::invalid_argument("Invalid value for -m " + std::to_string(arg.M)); if(arg.N < 0) throw std::invalid_argument("Invalid value for -n " + std::to_string(arg.N)); if(arg.K < 0) throw std::invalid_argument("Invalid value for -k " + std::to_string(arg.K)); int copied = snprintf(arg.function, sizeof(arg.function), "%s", function.c_str()); if(copied <= 0 || copied >= sizeof(arg.function)) throw std::invalid_argument("Invalid value for --function"); if(!parallel_devices) return run_bench_test(arg, 0, 1); else return run_bench_multi_gpu_test(parallel_devices, arg); } catch(const std::invalid_argument& exp) { std::cerr << exp.what() << std::endl; return -1; } hipBLAS-rocm-5.5.1/clients/cmake/000077500000000000000000000000001434647641600164425ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/clients/cmake/build-options.cmake000066400000000000000000000017531434647641600222420ustar00rootroot00000000000000# ######################################################################## # Copyright 2016-2020 Advanced Micro Devices, Inc. # ######################################################################## # This file is intended to be used in two ways; independently in a stand alone PROJECT # and as part of a superbuild. If the file is included in a stand alone project, the # variables are not expected to be preset, and this will produce options() in the GUI # for the user to examine. If this file is included in a superbuild, the options will be # presented in the superbuild GUI, but then passed into the ExternalProject as -D # parameters, which would already define them. if( NOT BUILD_CLIENTS_TESTS ) option( BUILD_CLIENTS_TESTS "Build hipBLAS unit tests" OFF ) endif( ) if( NOT BUILD_CLIENTS_BENCHMARKS ) option( BUILD_CLIENTS_BENCHMARKS "Build hipBLAS benchmarks" OFF ) endif( ) if( NOT BUILD_CLIENTS_SAMPLES ) option( BUILD_CLIENTS_SAMPLES "Build hipBLAS samples" OFF ) endif( ) hipBLAS-rocm-5.5.1/clients/common/000077500000000000000000000000001434647641600166525ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/clients/common/arg_check.cpp000066400000000000000000000031571434647641600212720ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "arg_check.h" #include "hipblas.h" #include void verify_hipblas_status_invalid_value(hipblasStatus_t status, const char* message) { #ifdef GOOGLE_TEST ASSERT_EQ(status, HIPBLAS_STATUS_INVALID_VALUE); #endif if(status != HIPBLAS_STATUS_INVALID_VALUE) { std::cout << message << std::endl; } } hipBLAS-rocm-5.5.1/clients/common/argument_model.cpp000066400000000000000000000034571434647641600223710ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ************************************************************************ */ #include "argument_model.hpp" // this should have been a member variable but due to the complex variadic template this singleton allows global control static bool log_function_name = false; void ArgumentModel_set_log_function_name(bool f) { log_function_name = f; } bool ArgumentModel_get_log_function_name() { return log_function_name; } static bool log_datatype = false; void ArgumentModel_set_log_datatype(bool d) { log_datatype = d; } bool ArgumentModel_get_log_datatype() { return log_datatype; } hipBLAS-rocm-5.5.1/clients/common/blis_interface.cpp000066400000000000000000000026571434647641600223410ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "blis.h" #include "omp.h" void setup_blis() { #ifndef WIN32 bli_init(); #endif } static int initialize_blis = (setup_blis(), 0); hipBLAS-rocm-5.5.1/clients/common/cblas_interface.cpp000066400000000000000000003667321434647641600225030ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************/ #include "cblas_interface.h" #include "cblas.h" #include "hipblas.h" #include "utility.h" #include #include #include /*!\file * \brief provide template functions interfaces to CBLAS C89 interfaces, it is only used for testing * not part of the GPU library */ #ifdef __cplusplus extern "C" { #endif void strtri_(char* uplo, char* diag, int* n, float* A, int* lda, int* info); void dtrtri_(char* uplo, char* diag, int* n, double* A, int* lda, int* info); void ctrtri_(char* uplo, char* diag, int* n, hipblasComplex* A, int* lda, int* info); void ztrtri_(char* uplo, char* diag, int* n, hipblasDoubleComplex* A, int* lda, int* info); void sgetrf_(int* m, int* n, float* A, int* lda, int* ipiv, int* info); void dgetrf_(int* m, int* n, double* A, int* lda, int* ipiv, int* info); void cgetrf_(int* m, int* n, hipblasComplex* A, int* lda, int* ipiv, int* info); void zgetrf_(int* m, int* n, hipblasDoubleComplex* A, int* lda, int* ipiv, int* info); void sgetrs_( char* trans, int* n, int* nrhs, float* A, int* lda, int* ipiv, float* B, int* ldb, int* info); void dgetrs_( char* trans, int* n, int* nrhs, double* A, int* lda, int* ipiv, double* B, int* ldb, int* info); void cgetrs_(char* trans, int* n, int* nrhs, hipblasComplex* A, int* lda, int* ipiv, hipblasComplex* B, int* ldb, int* info); void zgetrs_(char* trans, int* n, int* nrhs, hipblasDoubleComplex* A, int* lda, int* ipiv, hipblasDoubleComplex* B, int* ldb, int* info); void sgetri_(int* n, float* A, int* lda, int* ipiv, float* work, int* lwork, int* info); void dgetri_(int* n, double* A, int* lda, int* ipiv, double* work, int* lwork, int* info); void cgetri_( int* n, hipblasComplex* A, int* lda, int* ipiv, hipblasComplex* work, int* lwork, int* info); void zgetri_(int* n, hipblasDoubleComplex* A, int* lda, int* ipiv, hipblasDoubleComplex* work, int* lwork, int* info); void sgeqrf_(int* m, int* n, float* A, int* lda, float* tau, float* work, int* lwork, int* info); void dgeqrf_(int* m, int* n, double* A, int* lda, double* tau, double* work, int* lwork, int* info); void cgeqrf_(int* m, int* n, hipblasComplex* A, int* lda, hipblasComplex* tau, hipblasComplex* work, int* lwork, int* info); void zgeqrf_(int* m, int* n, hipblasDoubleComplex* A, int* lda, hipblasDoubleComplex* tau, hipblasDoubleComplex* work, int* lwork, int* info); void sgels_(char* trans, int* m, int* n, int* nrhs, float* A, int* lda, float* B, int* ldb, float* work, int* lwork, int* info); void dgels_(char* trans, int* m, int* n, int* nrhs, double* A, int* lda, double* B, int* ldb, double* work, int* lwork, int* info); void cgels_(char* trans, int* m, int* n, int* nrhs, hipblasComplex* A, int* lda, hipblasComplex* B, int* ldb, hipblasComplex* work, int* lwork, int* info); void zgels_(char* trans, int* m, int* n, int* nrhs, hipblasDoubleComplex* A, int* lda, hipblasDoubleComplex* B, int* ldb, hipblasDoubleComplex* work, int* lwork, int* info); void spotrf_(char* uplo, int* m, float* A, int* lda, int* info); void dpotrf_(char* uplo, int* m, double* A, int* lda, int* info); void cpotrf_(char* uplo, int* m, hipblasComplex* A, int* lda, int* info); void zpotrf_(char* uplo, int* m, hipblasDoubleComplex* A, int* lda, int* info); void cspr_( char* uplo, int* n, hipblasComplex* alpha, hipblasComplex* x, int* incx, hipblasComplex* A); void zspr_(char* uplo, int* n, hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int* incx, hipblasDoubleComplex* A); void csyr_(char* uplo, int* n, hipblasComplex* alpha, hipblasComplex* x, int* incx, hipblasComplex* a, int* lda); void zsyr_(char* uplo, int* n, hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int* incx, hipblasDoubleComplex* a, int* lda); void csymv_(char* uplo, int* n, hipblasComplex* alpha, hipblasComplex* A, int* lda, hipblasComplex* x, int* incx, hipblasComplex* beta, hipblasComplex* y, int* incy); void zsymv_(char* uplo, int* n, hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int* lda, hipblasDoubleComplex* x, int* incx, hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int* incy); #ifdef __cplusplus } #endif /* * =========================================================================== * level 1 BLAS * =========================================================================== */ // axpy template <> void cblas_axpy( int n, const hipblasHalf alpha, const hipblasHalf* x, int incx, hipblasHalf* y, int incy) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = half_to_float(x[i * abs_incx]); y_float[i * abs_incy] = half_to_float(y[i * abs_incy]); } cblas_saxpy(n, half_to_float(alpha), x_float.data(), incx, y_float.data(), incy); for(size_t i = 0; i < n; i++) { y[i * abs_incy] = float_to_half(y_float[i * abs_incy]); } } template <> void cblas_axpy( int n, const float alpha, const hipblasHalf* x, int incx, hipblasHalf* y, int incy) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = half_to_float(x[i * abs_incx]); y_float[i * abs_incy] = half_to_float(y[i * abs_incy]); } cblas_saxpy(n, alpha, x_float.data(), incx, y_float.data(), incy); for(size_t i = 0; i < n; i++) { y[i * abs_incy] = float_to_half(y_float[i * abs_incy]); } } template <> void cblas_axpy( int n, const float alpha, const float* x, int incx, float* y, int incy) { cblas_saxpy(n, alpha, x, incx, y, incy); } template <> void cblas_axpy( int n, const double alpha, const double* x, int incx, double* y, int incy) { cblas_daxpy(n, alpha, x, incx, y, incy); } template <> void cblas_axpy(int n, const hipblasComplex alpha, const hipblasComplex* x, int incx, hipblasComplex* y, int incy) { cblas_caxpy(n, &alpha, x, incx, y, incy); } template <> void cblas_axpy(int n, const hipblasDoubleComplex alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { cblas_zaxpy(n, &alpha, x, incx, y, incy); } // scal template <> void cblas_scal(int n, const hipblasHalf alpha, hipblasHalf* x, int incx) { if(n <= 0 || incx <= 0) return; std::vector x_float(n * incx); for(size_t i = 0; i < n; i++) x_float[i * incx] = half_to_float(x[i * incx]); cblas_sscal(n, half_to_float(alpha), x_float.data(), incx); for(size_t i = 0; i < n; i++) x[i * incx] = float_to_half(x_float[i * incx]); } template <> void cblas_scal(int n, const float alpha, hipblasHalf* x, int incx) { if(n <= 0 || incx <= 0) return; std::vector x_float(n * incx); for(size_t i = 0; i < n; i++) x_float[i * incx] = half_to_float(x[i * incx]); cblas_sscal(n, alpha, x_float.data(), incx); for(size_t i = 0; i < n; i++) x[i * incx] = float_to_half(x_float[i * incx]); } template <> void cblas_scal(int n, const float alpha, float* x, int incx) { cblas_sscal(n, alpha, x, incx); } template <> void cblas_scal(int n, const double alpha, double* x, int incx) { cblas_dscal(n, alpha, x, incx); } template <> void cblas_scal(int n, const hipblasComplex alpha, hipblasComplex* x, int incx) { cblas_cscal(n, &alpha, x, incx); } template <> void cblas_scal(int n, const float alpha, hipblasComplex* x, int incx) { cblas_csscal(n, alpha, x, incx); } template <> void cblas_scal(int n, const hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int incx) { cblas_zscal(n, &alpha, x, incx); } template <> void cblas_scal(int n, const double alpha, hipblasDoubleComplex* x, int incx) { cblas_zdscal(n, alpha, x, incx); } // copy template <> void cblas_copy(int n, float* x, int incx, float* y, int incy) { cblas_scopy(n, x, incx, y, incy); } template <> void cblas_copy(int n, double* x, int incx, double* y, int incy) { cblas_dcopy(n, x, incx, y, incy); } template <> void cblas_copy(int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy) { cblas_ccopy(n, x, incx, y, incy); } template <> void cblas_copy( int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { cblas_zcopy(n, x, incx, y, incy); } // swap template <> void cblas_swap(int n, float* x, int incx, float* y, int incy) { cblas_sswap(n, x, incx, y, incy); } template <> void cblas_swap(int n, double* x, int incx, double* y, int incy) { cblas_dswap(n, x, incx, y, incy); } template <> void cblas_swap(int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy) { cblas_cswap(n, x, incx, y, incy); } template <> void cblas_swap( int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { cblas_zswap(n, x, incx, y, incy); } // dot template <> void cblas_dot( int n, const hipblasHalf* x, int incx, const hipblasHalf* y, int incy, hipblasHalf* result) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = half_to_float(x[i * abs_incx]); y_float[i * abs_incy] = half_to_float(y[i * abs_incy]); } *result = float_to_half(cblas_sdot(n, x_float.data(), incx, y_float.data(), incy)); } template <> void cblas_dot(int n, const hipblasBfloat16* x, int incx, const hipblasBfloat16* y, int incy, hipblasBfloat16* result) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = bfloat16_to_float(x[i * abs_incx]); y_float[i * abs_incy] = bfloat16_to_float(y[i * abs_incy]); } *result = float_to_bfloat16(cblas_sdot(n, x_float.data(), incx, y_float.data(), incy)); } template <> void cblas_dot(int n, const float* x, int incx, const float* y, int incy, float* result) { *result = cblas_sdot(n, x, incx, y, incy); } template <> void cblas_dot(int n, const double* x, int incx, const double* y, int incy, double* result) { *result = cblas_ddot(n, x, incx, y, incy); } template <> void cblas_dot(int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result) { cblas_cdotu_sub(n, x, incx, y, incy, result); } template <> void cblas_dot(int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result) { cblas_zdotu_sub(n, x, incx, y, incy, result); } template <> void cblas_dotc( int n, const hipblasHalf* x, int incx, const hipblasHalf* y, int incy, hipblasHalf* result) { // Not complex - call regular dot. cblas_dot(n, x, incx, y, incy, result); } template <> void cblas_dotc(int n, const hipblasBfloat16* x, int incx, const hipblasBfloat16* y, int incy, hipblasBfloat16* result) { // Not complex - call regular dot. cblas_dot(n, x, incx, y, incy, result); } template <> void cblas_dotc(int n, const float* x, int incx, const float* y, int incy, float* result) { // Not complex - call regular dot. cblas_dot(n, x, incx, y, incy, result); } template <> void cblas_dotc(int n, const double* x, int incx, const double* y, int incy, double* result) { // Not complex - call regular dot. cblas_dot(n, x, incx, y, incy, result); } template <> void cblas_dotc(int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result) { cblas_cdotc_sub(n, x, incx, y, incy, result); } template <> void cblas_dotc(int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result) { cblas_zdotc_sub(n, x, incx, y, incy, result); } // nrm2 template <> void cblas_nrm2(int n, const hipblasHalf* x, int incx, hipblasHalf* result) { if(n <= 0 || incx <= 0) return; std::vector x_float(n * incx); for(size_t i = 0; i < n; i++) x_float[i * incx] = half_to_float(x[i * incx]); *result = float_to_half(cblas_snrm2(n, x_float.data(), incx)); } template <> void cblas_nrm2(int n, const float* x, int incx, float* result) { *result = cblas_snrm2(n, x, incx); } template <> void cblas_nrm2(int n, const double* x, int incx, double* result) { *result = cblas_dnrm2(n, x, incx); } template <> void cblas_nrm2(int n, const hipblasComplex* x, int incx, float* result) { *result = cblas_scnrm2(n, x, incx); } template <> void cblas_nrm2(int n, const hipblasDoubleComplex* x, int incx, double* result) { *result = cblas_dznrm2(n, x, incx); } /////////////////// // rot functions // /////////////////// // LAPACK fortran library functionality extern "C" { void crot_(const int* n, hipblasComplex* cx, const int* incx, hipblasComplex* cy, const int* incy, const float* c, const hipblasComplex* s); void csrot_(const int* n, hipblasComplex* cx, const int* incx, hipblasComplex* cy, const int* incy, const float* c, const float* s); void zrot_(const int* n, hipblasDoubleComplex* cx, const int* incx, hipblasDoubleComplex* cy, const int* incy, const double* c, const hipblasDoubleComplex* s); void zdrot_(const int* n, hipblasDoubleComplex* cx, const int* incx, hipblasDoubleComplex* cy, const int* incy, const double* c, const double* s); void crotg_(hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s); void zrotg_(hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s); } // rot template <> void cblas_rot( int n, hipblasHalf* x, int incx, hipblasHalf* y, int incy, hipblasHalf c, hipblasHalf s) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; size_t size_x = n * abs_incx; size_t size_y = n * abs_incy; if(!size_x) size_x = 1; if(!size_y) size_y = 1; std::vector x_float(size_x); std::vector y_float(size_y); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = half_to_float(x[i * abs_incx]); y_float[i * abs_incy] = half_to_float(y[i * abs_incy]); } const float c_float = half_to_float(c); const float s_float = half_to_float(s); cblas_srot(n, x_float.data(), incx, y_float.data(), incy, c_float, s_float); for(size_t i = 0; i < n; i++) { x[i * abs_incx] = float_to_half(x_float[i * abs_incx]); y[i * abs_incy] = float_to_half(y_float[i * abs_incy]); } } template <> void cblas_rot(int n, hipblasBfloat16* x, int incx, hipblasBfloat16* y, int incy, hipblasBfloat16 c, hipblasBfloat16 s) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; size_t size_x = n * abs_incx; size_t size_y = n * abs_incy; if(!size_x) size_x = 1; if(!size_y) size_y = 1; std::vector x_float(size_x); std::vector y_float(size_y); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = bfloat16_to_float(x[i * abs_incx]); y_float[i * abs_incy] = bfloat16_to_float(y[i * abs_incy]); } const float c_float = bfloat16_to_float(c); const float s_float = bfloat16_to_float(s); cblas_srot(n, x_float.data(), incx, y_float.data(), incy, c_float, s_float); for(size_t i = 0; i < n; i++) { x[i * abs_incx] = float_to_bfloat16(x_float[i * abs_incx]); y[i * abs_incy] = float_to_bfloat16(y_float[i * abs_incy]); } } template <> void cblas_rot(int n, float* x, int incx, float* y, int incy, float c, float s) { cblas_srot(n, x, incx, y, incy, c, s); } template <> void cblas_rot(int n, double* x, int incx, double* y, int incy, double c, double s) { cblas_drot(n, x, incx, y, incy, c, s); } template <> void cblas_rot(int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, hipblasComplex c, hipblasComplex s) { float c_real = std::real(c); crot_(&n, x, &incx, y, &incy, &c_real, &s); } template <> void cblas_rot( int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, float c, hipblasComplex s) { crot_(&n, x, &incx, y, &incy, &c, &s); } template <> void cblas_rot( int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, float c, float s) { csrot_(&n, x, &incx, y, &incy, &c, &s); } template <> void cblas_rot(int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, hipblasDoubleComplex c, hipblasDoubleComplex s) { double c_real = std::real(c); zrot_(&n, x, &incx, y, &incy, &c_real, &s); } template <> void cblas_rot(int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, double c, hipblasDoubleComplex s) { zrot_(&n, x, &incx, y, &incy, &c, &s); } template <> void cblas_rot( int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, double c, double s) { zdrot_(&n, x, &incx, y, &incy, &c, &s); } // rotg template <> void cblas_rotg(float* a, float* b, float* c, float* s) { cblas_srotg(a, b, c, s); } template <> void cblas_rotg(double* a, double* b, double* c, double* s) { cblas_drotg(a, b, c, s); } template <> void cblas_rotg(hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s) { crotg_(a, b, c, s); } template <> void cblas_rotg(hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s) { zrotg_(a, b, c, s); } // rotm template <> void cblas_rotm(int n, float* x, int incx, float* y, int incy, float* param) { cblas_srotm(n, x, incx, y, incy, param); } template <> void cblas_rotm(int n, double* x, int incx, double* y, int incy, double* param) { cblas_drotm(n, x, incx, y, incy, param); } // rotmg template <> void cblas_rotmg(float* d1, float* d2, float* x1, float* y1, float* param) { cblas_srotmg(d1, d2, x1, *y1, param); } template <> void cblas_rotmg(double* d1, double* d2, double* x1, double* y1, double* param) { cblas_drotmg(d1, d2, x1, *y1, param); } // asum template <> void cblas_asum(int n, const float* x, int incx, float* result) { *result = cblas_sasum(n, x, incx); } template <> void cblas_asum(int n, const double* x, int incx, double* result) { *result = cblas_dasum(n, x, incx); } template <> void cblas_asum(int n, const hipblasComplex* x, int incx, float* result) { *result = cblas_scasum(n, x, incx); } template <> void cblas_asum(int n, const hipblasDoubleComplex* x, int incx, double* result) { *result = cblas_dzasum(n, x, incx); } // amax template <> void cblas_iamax(int n, const float* x, int incx, int* result) { *result = (int)cblas_isamax(n, x, incx); } template <> void cblas_iamax(int n, const double* x, int incx, int* result) { *result = (int)cblas_idamax(n, x, incx); } template <> void cblas_iamax(int n, const hipblasComplex* x, int incx, int* result) { *result = (int)cblas_icamax(n, x, incx); } template <> void cblas_iamax(int n, const hipblasDoubleComplex* x, int incx, int* result) { *result = (int)cblas_izamax(n, x, incx); } // amin // amin is not implemented in cblas, make local version template double abs_helper(T val) { return val < 0 ? -val : val; } template <> double abs_helper(hipblasComplex val) { return std::abs(val.real()) + std::abs(val.imag()); } template <> double abs_helper(hipblasDoubleComplex val) { return std::abs(val.real()) + std::abs(val.imag()); } template int cblas_iamin_helper(int N, const T* X, int incx) { int minpos = -1; if(N > 0 && incx > 0) { auto min = abs_helper(X[0]); minpos = 0; for(size_t i = 1; i < N; ++i) { auto a = abs_helper(X[i * incx]); if(a < min) { min = a; minpos = i; } } } return minpos; } template <> void cblas_iamin(int n, const float* x, int incx, int* result) { *result = (int)cblas_iamin_helper(n, x, incx); } template <> void cblas_iamin(int n, const double* x, int incx, int* result) { *result = (int)cblas_iamin_helper(n, x, incx); } template <> void cblas_iamin(int n, const hipblasComplex* x, int incx, int* result) { *result = (int)cblas_iamin_helper(n, x, incx); } template <> void cblas_iamin(int n, const hipblasDoubleComplex* x, int incx, int* result) { *result = (int)cblas_iamin_helper(n, x, incx); } /* * =========================================================================== * level 2 BLAS * =========================================================================== */ // gbmv template <> void cblas_gbmv(hipblasOperation_t transA, int m, int n, int kl, int ku, float alpha, float* A, int lda, float* x, int incx, float beta, float* y, int incy) { cblas_sgbmv(CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> void cblas_gbmv(hipblasOperation_t transA, int m, int n, int kl, int ku, double alpha, double* A, int lda, double* x, int incx, double beta, double* y, int incy) { cblas_dgbmv(CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> void cblas_gbmv(hipblasOperation_t transA, int m, int n, int kl, int ku, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* x, int incx, hipblasComplex beta, hipblasComplex* y, int incy) { cblas_cgbmv(CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, kl, ku, &alpha, A, lda, x, incx, &beta, y, incy); } template <> void cblas_gbmv(hipblasOperation_t transA, int m, int n, int kl, int ku, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int incy) { cblas_zgbmv(CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, kl, ku, &alpha, A, lda, x, incx, &beta, y, incy); } // gemv template <> void cblas_gemv(hipblasOperation_t transA, int m, int n, float alpha, float* A, int lda, float* x, int incx, float beta, float* y, int incy) { cblas_sgemv( CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> void cblas_gemv(hipblasOperation_t transA, int m, int n, double alpha, double* A, int lda, double* x, int incx, double beta, double* y, int incy) { cblas_dgemv( CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> void cblas_gemv(hipblasOperation_t transA, int m, int n, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* x, int incx, hipblasComplex beta, hipblasComplex* y, int incy) { cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, &alpha, A, lda, x, incx, &beta, y, incy); } template <> void cblas_gemv(hipblasOperation_t transA, int m, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int incy) { cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, &alpha, A, lda, x, incx, &beta, y, incy); } // ger template <> void cblas_ger( int m, int n, float alpha, float* x, int incx, float* y, int incy, float* A, int lda) { cblas_sger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda); } template <> void cblas_ger( int m, int n, double alpha, double* x, int incx, double* y, int incy, double* A, int lda) { cblas_dger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda); } template <> void cblas_ger(int m, int n, hipblasComplex alpha, hipblasComplex* x, int incx, hipblasComplex* y, int incy, hipblasComplex* A, int lda) { cblas_cgeru(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); } template <> void cblas_ger(int m, int n, hipblasComplex alpha, hipblasComplex* x, int incx, hipblasComplex* y, int incy, hipblasComplex* A, int lda) { cblas_cgerc(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); } template <> void cblas_ger(int m, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { cblas_zgeru(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); } template <> void cblas_ger(int m, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { cblas_zgerc(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); } // hbmv template <> void cblas_hbmv(hipblasFillMode_t uplo, int n, int k, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* x, int incx, hipblasComplex beta, hipblasComplex* y, int incy) { cblas_chbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, &alpha, A, lda, x, incx, &beta, y, incy); } template <> void cblas_hbmv(hipblasFillMode_t uplo, int n, int k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int incy) { cblas_zhbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, &alpha, A, lda, x, incx, &beta, y, incy); } // hemv template <> void cblas_hemv(hipblasFillMode_t uplo, int n, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* x, int incx, hipblasComplex beta, hipblasComplex* y, int incy) { cblas_chemv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, A, lda, x, incx, &beta, y, incy); } template <> void cblas_hemv(hipblasFillMode_t uplo, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int incy) { cblas_zhemv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, A, lda, x, incx, &beta, y, incy); } // her template <> void cblas_her(hipblasFillMode_t uplo, int n, float alpha, hipblasComplex* x, int incx, hipblasComplex* A, int lda) { cblas_cher(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); } template <> void cblas_her(hipblasFillMode_t uplo, int n, double alpha, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda) { cblas_zher(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); } // her2 template <> void cblas_her2(hipblasFillMode_t uplo, int n, hipblasComplex alpha, hipblasComplex* x, int incx, hipblasComplex* y, int incy, hipblasComplex* A, int lda) { cblas_cher2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, A, lda); } template <> void cblas_her2(hipblasFillMode_t uplo, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { cblas_zher2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, A, lda); } // hpmv template <> void cblas_hpmv(hipblasFillMode_t uplo, int n, hipblasComplex alpha, hipblasComplex* AP, hipblasComplex* x, int incx, hipblasComplex beta, hipblasComplex* y, int incy) { cblas_chpmv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, AP, x, incx, &beta, y, incy); } template <> void cblas_hpmv(hipblasFillMode_t uplo, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int incy) { cblas_zhpmv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, AP, x, incx, &beta, y, incy); } // hpr template <> void cblas_hpr( hipblasFillMode_t uplo, int n, float alpha, hipblasComplex* x, int incx, hipblasComplex* AP) { cblas_chpr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); } template <> void cblas_hpr(hipblasFillMode_t uplo, int n, double alpha, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP) { cblas_zhpr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); } // hpr2 template <> void cblas_hpr2(hipblasFillMode_t uplo, int n, hipblasComplex alpha, hipblasComplex* x, int incx, hipblasComplex* y, int incy, hipblasComplex* AP) { cblas_chpr2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, AP); } template <> void cblas_hpr2(hipblasFillMode_t uplo, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP) { cblas_zhpr2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, AP); } // sbmv template <> void cblas_sbmv(hipblasFillMode_t uplo, int n, int k, float alpha, float* A, int lda, float* x, int incx, float beta, float* y, int incy) { cblas_ssbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } template <> void cblas_sbmv(hipblasFillMode_t uplo, int n, int k, double alpha, double* A, int lda, double* x, int incx, double beta, double* y, int incy) { cblas_dsbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } // spmv template <> void cblas_spmv(hipblasFillMode_t uplo, int n, float alpha, float* AP, float* x, int incx, float beta, float* y, int incy) { cblas_sspmv(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, AP, x, incx, beta, y, incy); } template <> void cblas_spmv(hipblasFillMode_t uplo, int n, double alpha, double* AP, double* x, int incx, double beta, double* y, int incy) { cblas_dspmv(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, AP, x, incx, beta, y, incy); } // spr template <> void cblas_spr(hipblasFillMode_t uplo, int n, float alpha, float* x, int incx, float* AP) { cblas_sspr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); } template <> void cblas_spr(hipblasFillMode_t uplo, int n, double alpha, double* x, int incx, double* AP) { cblas_dspr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); } template <> void cblas_spr(hipblasFillMode_t uplo, int n, hipblasComplex alpha, hipblasComplex* x, int incx, hipblasComplex* AP) { char u = uplo == HIPBLAS_FILL_MODE_UPPER ? 'U' : 'L'; cspr_(&u, &n, &alpha, x, &incx, AP); } template <> void cblas_spr(hipblasFillMode_t uplo, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP) { char u = uplo == HIPBLAS_FILL_MODE_UPPER ? 'U' : 'L'; zspr_(&u, &n, &alpha, x, &incx, AP); } // spr2 template <> void cblas_spr2( hipblasFillMode_t uplo, int n, float alpha, float* x, int incx, float* y, int incy, float* AP) { cblas_sspr2(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, y, incy, AP); } template <> void cblas_spr2(hipblasFillMode_t uplo, int n, double alpha, double* x, int incx, double* y, int incy, double* AP) { cblas_dspr2(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, y, incy, AP); } // symv template <> void cblas_symv(hipblasFillMode_t uplo, int n, float alpha, float* A, int lda, float* x, int incx, float beta, float* y, int incy) { cblas_ssymv(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> void cblas_symv(hipblasFillMode_t uplo, int n, double alpha, double* A, int lda, double* x, int incx, double beta, double* y, int incy) { cblas_dsymv(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> void cblas_symv(hipblasFillMode_t uplo, int n, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* x, int incx, hipblasComplex beta, hipblasComplex* y, int incy) { char u = uplo == HIPBLAS_FILL_MODE_UPPER ? 'U' : 'L'; csymv_(&u, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } template <> void cblas_symv(hipblasFillMode_t uplo, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int incy) { char u = uplo == HIPBLAS_FILL_MODE_UPPER ? 'U' : 'L'; zsymv_(&u, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } // syr template <> void cblas_syr( hipblasFillMode_t uplo, int n, float alpha, float* x, int incx, float* A, int lda) { cblas_ssyr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); } template <> void cblas_syr( hipblasFillMode_t uplo, int n, double alpha, double* x, int incx, double* A, int lda) { cblas_dsyr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); } template <> void cblas_syr(hipblasFillMode_t uplo, int n, hipblasComplex alpha, hipblasComplex* x, int incx, hipblasComplex* A, int lda) { char u = uplo == HIPBLAS_FILL_MODE_UPPER ? 'U' : 'L'; csyr_(&u, &n, &alpha, x, &incx, A, &lda); } template <> void cblas_syr(hipblasFillMode_t uplo, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda) { char u = uplo == HIPBLAS_FILL_MODE_UPPER ? 'U' : 'L'; zsyr_(&u, &n, &alpha, x, &incx, A, &lda); } // syr2 // No complex version of syr2 - make a local implementation template void cblas_syr2_local( hipblasFillMode_t uplo, int n, T alpha, T* xa, int incx, T* ya, int incy, T* A, int lda) { if(n <= 0) return; T* x = incx < 0 ? xa - ptrdiff_t(incx) * (n - 1) : xa; T* y = incy < 0 ? ya - ptrdiff_t(incy) * (n - 1) : ya; if(uplo == HIPBLAS_FILL_MODE_UPPER) for(int j = 0; j < n; ++j) { T tmpx = alpha * x[j * incx]; T tmpy = alpha * y[j * incx]; for(int i = 0; i <= j; ++i) A[i + j * lda] += x[i * incx] * tmpy + y[i * incy] * tmpx; } else for(int j = 0; j < n; ++j) { T tmpx = alpha * x[j * incx]; T tmpy = alpha * y[j * incx]; for(int i = j; i < n; ++i) A[i + j * lda] += x[i * incx] * tmpy + y[i * incy] * tmpx; } } template <> void cblas_syr2(hipblasFillMode_t uplo, int n, float alpha, float* x, int incx, float* y, int incy, float* A, int lda) { cblas_ssyr2(CblasColMajor, CBLAS_UPLO(uplo), n, alpha, x, incx, y, incy, A, lda); } template <> void cblas_syr2(hipblasFillMode_t uplo, int n, double alpha, double* x, int incx, double* y, int incy, double* A, int lda) { cblas_dsyr2(CblasColMajor, CBLAS_UPLO(uplo), n, alpha, x, incx, y, incy, A, lda); } template <> void cblas_syr2(hipblasFillMode_t uplo, int n, hipblasComplex alpha, hipblasComplex* x, int incx, hipblasComplex* y, int incy, hipblasComplex* A, int lda) { cblas_syr2_local(uplo, n, alpha, x, incx, y, incy, A, lda); } template <> void cblas_syr2(hipblasFillMode_t uplo, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { cblas_syr2_local(uplo, n, alpha, x, incx, y, incy, A, lda); } // potrf template <> int cblas_potrf(char uplo, int m, float* A, int lda) { int info; spotrf_(&uplo, &m, A, &lda, &info); return info; } template <> int cblas_potrf(char uplo, int m, double* A, int lda) { int info; dpotrf_(&uplo, &m, A, &lda, &info); return info; } template <> int cblas_potrf(char uplo, int m, hipblasComplex* A, int lda) { int info; cpotrf_(&uplo, &m, A, &lda, &info); return info; } template <> int cblas_potrf(char uplo, int m, hipblasDoubleComplex* A, int lda) { int info; zpotrf_(&uplo, &m, A, &lda, &info); return info; } // tbmv template <> void cblas_tbmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, float* x, int incx) { cblas_stbmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void cblas_tbmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, double* x, int incx) { cblas_dtbmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void cblas_tbmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { cblas_ctbmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void cblas_tbmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { cblas_ztbmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } // tbsv template <> void cblas_tbsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, float* x, int incx) { cblas_stbsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void cblas_tbsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, double* x, int incx) { cblas_dtbsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void cblas_tbsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { cblas_ctbsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void cblas_tbsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { cblas_ztbsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } // tpmv template <> void cblas_tpmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, float* x, int incx) { cblas_stpmv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx); } template <> void cblas_tpmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, double* x, int incx) { cblas_dtpmv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx); } template <> void cblas_tpmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, hipblasComplex* x, int incx) { cblas_ctpmv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx); } template <> void cblas_tpmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, hipblasDoubleComplex* x, int incx) { cblas_ztpmv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx); } // tpsv template <> void cblas_tpsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, const float* AP, float* x, int incx) { cblas_stpsv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx); } template <> void cblas_tpsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, const double* AP, double* x, int incx) { cblas_dtpsv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx); } template <> void cblas_tpsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, const hipblasComplex* AP, hipblasComplex* x, int incx) { cblas_ctpsv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx); } template <> void cblas_tpsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx) { cblas_ztpsv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx); } // trmv template <> void cblas_trmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, float* x, int incx) { cblas_strmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void cblas_trmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, double* x, int incx) { cblas_dtrmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void cblas_trmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { cblas_ctrmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void cblas_trmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { cblas_ztrmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } // trsv template <> void cblas_trsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, float* x, int incx) { cblas_strsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void cblas_trsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, double* x, int incx) { cblas_dtrsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void cblas_trsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { cblas_ctrsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void cblas_trsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { cblas_ztrsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } /* * =========================================================================== * level 3 BLAS * =========================================================================== */ template void cblas_geam_helper(hipblasOperation_t transA, hipblasOperation_t transB, int M, int N, T alpha, T* A, int lda, T beta, T* B, int ldb, T* C, int ldc) { int inc1_A = transA == HIPBLAS_OP_N ? 1 : lda; int inc2_A = transA == HIPBLAS_OP_N ? lda : 1; int inc1_B = transB == HIPBLAS_OP_N ? 1 : ldb; int inc2_B = transB == HIPBLAS_OP_N ? ldb : 1; for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { T a_val = A[i * inc1_A + j * inc2_A]; T b_val = B[i * inc1_B + j * inc2_B]; if(transA == HIPBLAS_OP_C) a_val = std::conj(a_val); if(transB == HIPBLAS_OP_C) b_val = std::conj(b_val); C[i + j * ldc] = alpha * a_val + beta * b_val; } } } // geam template <> void cblas_geam(hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, float* alpha, float* A, int lda, float* beta, float* B, int ldb, float* C, int ldc) { return cblas_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); } template <> void cblas_geam(hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, double* alpha, double* A, int lda, double* beta, double* B, int ldb, double* C, int ldc) { return cblas_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); } template <> void cblas_geam(hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, hipblasComplex* alpha, hipblasComplex* A, int lda, hipblasComplex* beta, hipblasComplex* B, int ldb, hipblasComplex* C, int ldc) { return cblas_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); } template <> void cblas_geam(hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* beta, hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex* C, int ldc) { return cblas_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); } // gemm template <> void cblas_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, hipblasHalf alpha, hipblasHalf* A, int lda, hipblasHalf* B, int ldb, hipblasHalf beta, hipblasHalf* C, int ldc) { // cblas does not support hipblasHalf, so convert to higher precision float // This will give more precise result which is acceptable for testing float alpha_float = half_to_float(alpha); float beta_float = half_to_float(beta); int sizeA = transA == HIPBLAS_OP_N ? k * lda : m * lda; int sizeB = transB == HIPBLAS_OP_N ? n * ldb : k * ldb; int sizeC = n * ldc; std::unique_ptr A_float(new float[sizeA]()); std::unique_ptr B_float(new float[sizeB]()); std::unique_ptr C_float(new float[sizeC]()); for(int i = 0; i < sizeA; i++) { A_float[i] = half_to_float(A[i]); } for(int i = 0; i < sizeB; i++) { B_float[i] = half_to_float(B[i]); } for(int i = 0; i < sizeC; i++) { C_float[i] = half_to_float(C[i]); } // just directly cast, since transA, transB are integers in the enum // printf("transA: rocblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha_float, const_cast(A_float.get()), lda, const_cast(B_float.get()), ldb, beta_float, static_cast(C_float.get()), ldc); for(int i = 0; i < sizeC; i++) { C[i] = float_to_half(C_float[i]); } } template <> void cblas_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, float alpha_float, hipblasHalf* A, int lda, hipblasHalf* B, int ldb, float beta_float, hipblasHalf* C, int ldc) { // cblas does not support hipblasHalf, so convert to higher precision float // This will give more precise result which is acceptable for testing int sizeA = transA == HIPBLAS_OP_N ? k * lda : m * lda; int sizeB = transB == HIPBLAS_OP_N ? n * ldb : k * ldb; int sizeC = n * ldc; std::unique_ptr A_float(new float[sizeA]()); std::unique_ptr B_float(new float[sizeB]()); std::unique_ptr C_float(new float[sizeC]()); for(int i = 0; i < sizeA; i++) { A_float[i] = half_to_float(A[i]); } for(int i = 0; i < sizeB; i++) { B_float[i] = half_to_float(B[i]); } for(int i = 0; i < sizeC; i++) { C_float[i] = half_to_float(C[i]); } // just directly cast, since transA, transB are integers in the enum // printf("transA: rocblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha_float, const_cast(A_float.get()), lda, const_cast(B_float.get()), ldb, beta_float, static_cast(C_float.get()), ldc); for(int i = 0; i < sizeC; i++) { C[i] = float_to_half(C_float[i]); } } template <> void cblas_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, float alpha_float, hipblasBfloat16* A, int lda, hipblasBfloat16* B, int ldb, float beta_float, hipblasBfloat16* C, int ldc) { // cblas does not support hipblasBfloat16, so convert to higher precision float // This will give more precise result which is acceptable for testing int sizeA = transA == HIPBLAS_OP_N ? k * lda : m * lda; int sizeB = transB == HIPBLAS_OP_N ? n * ldb : k * ldb; int sizeC = n * ldc; std::unique_ptr A_float(new float[sizeA]()); std::unique_ptr B_float(new float[sizeB]()); std::unique_ptr C_float(new float[sizeC]()); for(int i = 0; i < sizeA; i++) { A_float[i] = bfloat16_to_float(A[i]); } for(int i = 0; i < sizeB; i++) { B_float[i] = bfloat16_to_float(B[i]); } for(int i = 0; i < sizeC; i++) { C_float[i] = bfloat16_to_float(C[i]); } // just directly cast, since transA, transB are integers in the enum // printf("transA: rocblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha_float, const_cast(A_float.get()), lda, const_cast(B_float.get()), ldb, beta_float, static_cast(C_float.get()), ldc); for(int i = 0; i < sizeC; i++) { C[i] = float_to_bfloat16(C_float[i]); } } template <> void cblas_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, float alpha, float* A, int lda, float* B, int ldb, float beta, float* C, int ldc) { // just directly cast, since transA, transB are integers in the enum // printf("transA: hipblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, double alpha, double* A, int lda, double* B, int ldb, double beta, double* C, int ldc) { cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* B, int ldb, hipblasComplex beta, hipblasComplex* C, int ldc) { //just directly cast, since transA, transB are integers in the enum cblas_cgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void cblas_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int ldc) { cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void cblas_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, int32_t alpha, int8_t* A, int lda, int8_t* B, int ldb, int32_t beta, int32_t* C, int ldc) { double alpha_double = static_cast(alpha); double beta_double = static_cast(beta); size_t const sizeA = ((transA == HIPBLAS_OP_N) ? k : m) * size_t(lda); size_t const sizeB = ((transB == HIPBLAS_OP_N) ? n : k) * size_t(ldb); size_t const sizeC = n * size_t(ldc); std::unique_ptr A_double(new double[sizeA]()); std::unique_ptr B_double(new double[sizeB]()); std::unique_ptr C_double(new double[sizeC]()); for(int i = 0; i < sizeA; i++) { A_double[i] = static_cast(A[i]); } for(int i = 0; i < sizeB; i++) { B_double[i] = static_cast(B[i]); } for(int i = 0; i < sizeC; i++) { C_double[i] = static_cast(C[i]); } cblas_dgemm(CblasColMajor, static_cast(transA), static_cast(transB), m, n, k, alpha_double, const_cast(A_double.get()), lda, const_cast(B_double.get()), ldb, beta_double, static_cast(C_double.get()), ldc); for(size_t i = 0; i < sizeC; i++) C[i] = static_cast(C_double[i]); } // hemm template <> void cblas_hemm(hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* B, int ldb, hipblasComplex beta, hipblasComplex* C, int ldc) { cblas_chemm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void cblas_hemm(hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int ldc) { cblas_zhemm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, &alpha, A, lda, B, ldb, &beta, C, ldc); } // herk template <> void cblas_herk(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, float alpha, hipblasComplex* A, int lda, float beta, hipblasComplex* C, int ldc) { cblas_cherk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, beta, C, ldc); } template <> void cblas_herk(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, double alpha, hipblasDoubleComplex* A, int lda, double beta, hipblasDoubleComplex* C, int ldc) { cblas_zherk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, beta, C, ldc); } // herkx template void cblas_herkx_local(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, T alpha, T* A, int lda, T* B, int ldb, U beta, T* C, int ldc) { if(n <= 0 || (beta == 1 && (k == 0 || alpha == T(0.0)))) return; if(transA == HIPBLAS_OP_N) { if(uplo == HIPBLAS_FILL_MODE_UPPER) { for(int j = 0; j < n; ++j) { for(int i = 0; i <= j; i++) C[i + j * ldc] *= T(beta); for(int l = 0; l < k; l++) { T temp = alpha * std::conj(B[j + l * ldb]); for(int i = 0; i <= j; ++i) C[i + j * ldc] += temp * A[i + l * lda]; } } } else // lower { for(int j = 0; j < n; ++j) { for(int i = j; i < n; i++) C[i + j * ldc] *= T(beta); for(int l = 0; l < k; l++) { T temp = alpha * std::conj(B[j + l * ldb]); for(int i = j; i < n; ++i) C[i + j * ldc] += temp * A[i + l * lda]; } } } } else // conjugate transpose { if(uplo == HIPBLAS_FILL_MODE_UPPER) { for(int j = 0; j < n; ++j) for(int i = 0; i <= j; i++) { C[i + j * ldc] *= T(beta); T temp(0); for(int l = 0; l < k; l++) temp += std::conj(A[l + i * lda]) * B[l + j * ldb]; C[i + j * ldc] += alpha * temp; } } else // lower { for(int j = 0; j < n; ++j) for(int i = j; i < n; i++) { C[i + j * ldc] *= T(beta); T temp(0); for(int l = 0; l < k; l++) temp += std::conj(A[l + i * lda]) * B[l + j * ldb]; C[i + j * ldc] += alpha * temp; } } } for(int i = 0; i < n; i++) C[i + i * ldc].imag(0); } template <> void cblas_herkx(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* B, int ldb, float beta, hipblasComplex* C, int ldc) { cblas_herkx_local(uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_herkx(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb, double beta, hipblasDoubleComplex* C, int ldc) { cblas_herkx_local(uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // her2k template <> void cblas_her2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* B, int ldb, float beta, hipblasComplex* C, int ldc) { cblas_cher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_her2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb, double beta, hipblasDoubleComplex* C, int ldc) { cblas_zher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, B, ldb, beta, C, ldc); } // symm template <> void cblas_symm(hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, float alpha, float* A, int lda, float* B, int ldb, float beta, float* C, int ldc) { cblas_ssymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_symm(hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, double alpha, double* A, int lda, double* B, int ldb, double beta, double* C, int ldc) { cblas_dsymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_symm(hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* B, int ldb, hipblasComplex beta, hipblasComplex* C, int ldc) { cblas_csymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void cblas_symm(hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int ldc) { cblas_zsymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, &alpha, A, lda, B, ldb, &beta, C, ldc); } // syrk template <> void cblas_syrk(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, float alpha, float* A, int lda, float beta, float* C, int ldc) { cblas_ssyrk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, beta, C, ldc); } template <> void cblas_syrk(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, double alpha, double* A, int lda, double beta, double* C, int ldc) { cblas_dsyrk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, beta, C, ldc); } template <> void cblas_syrk(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex beta, hipblasComplex* C, int ldc) { cblas_csyrk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, &beta, C, ldc); } template <> void cblas_syrk(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int ldc) { cblas_zsyrk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, &beta, C, ldc); } // syr2k template <> void cblas_syr2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, float alpha, float* A, int lda, float* B, int ldb, float beta, float* C, int ldc) { cblas_ssyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_syr2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, double alpha, double* A, int lda, double* B, int ldb, double beta, double* C, int ldc) { cblas_dsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_syr2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, hipblasComplex alpha, hipblasComplex* A, int lda, hipblasComplex* B, int ldb, hipblasComplex beta, hipblasComplex* C, int ldc) { cblas_csyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void cblas_syr2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int ldc) { cblas_zsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } // syrkx // Use syrk with A == B for now. // trsm template <> void cblas_trsm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, float alpha, const float* A, int lda, float* B, int ldb) { // just directly cast, since transA, transB are integers in the enum cblas_strsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, alpha, A, lda, B, ldb); } template <> void cblas_trsm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, double alpha, const double* A, int lda, double* B, int ldb) { // just directly cast, since transA, transB are integers in the enum cblas_dtrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, alpha, A, lda, B, ldb); } template <> void cblas_trsm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, hipblasComplex alpha, const hipblasComplex* A, int lda, hipblasComplex* B, int ldb) { cblas_ctrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, &alpha, A, lda, B, ldb); } template <> void cblas_trsm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, hipblasDoubleComplex alpha, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb) { cblas_ztrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, &alpha, A, lda, B, ldb); } // trtri template <> int cblas_trtri(char uplo, char diag, int n, float* A, int lda) { // just directly cast, since transA, transB are integers in the enum // printf("transA: hipblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); int info; strtri_(&uplo, &diag, &n, A, &lda, &info); return info; } template <> int cblas_trtri(char uplo, char diag, int n, double* A, int lda) { // just directly cast, since transA, transB are integers in the enum // printf("transA: hipblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); int info; dtrtri_(&uplo, &diag, &n, A, &lda, &info); return info; } template <> int cblas_trtri(char uplo, char diag, int n, hipblasComplex* A, int lda) { // just directly cast, since transA, transB are integers in the enum // printf("transA: hipblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); int info; ctrtri_(&uplo, &diag, &n, A, &lda, &info); return info; } template <> int cblas_trtri(char uplo, char diag, int n, hipblasDoubleComplex* A, int lda) { // just directly cast, since transA, transB are integers in the enum // printf("transA: hipblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); int info; ztrtri_(&uplo, &diag, &n, A, &lda, &info); return info; } // trmm template <> void cblas_trmm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, float alpha, const float* A, int lda, float* B, int ldb) { // just directly cast, since transA, transB are integers in the enum cblas_strmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, alpha, A, lda, B, ldb); } template <> void cblas_trmm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, double alpha, const double* A, int lda, double* B, int ldb) { // just directly cast, since transA, transB are integers in the enum cblas_dtrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, alpha, A, lda, B, ldb); } template <> void cblas_trmm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, hipblasComplex alpha, const hipblasComplex* A, int lda, hipblasComplex* B, int ldb) { cblas_ctrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, &alpha, A, lda, B, ldb); } template <> void cblas_trmm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, hipblasDoubleComplex alpha, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb) { cblas_ztrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, &alpha, A, lda, B, ldb); } // getrf template <> int cblas_getrf(int m, int n, float* A, int lda, int* ipiv) { int info; sgetrf_(&m, &n, A, &lda, ipiv, &info); return info; } template <> int cblas_getrf(int m, int n, double* A, int lda, int* ipiv) { int info; dgetrf_(&m, &n, A, &lda, ipiv, &info); return info; } template <> int cblas_getrf(int m, int n, hipblasComplex* A, int lda, int* ipiv) { int info; cgetrf_(&m, &n, A, &lda, ipiv, &info); return info; } template <> int cblas_getrf(int m, int n, hipblasDoubleComplex* A, int lda, int* ipiv) { int info; zgetrf_(&m, &n, A, &lda, ipiv, &info); return info; } // getrs template <> int cblas_getrs(char trans, int n, int nrhs, float* A, int lda, int* ipiv, float* B, int ldb) { int info; sgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); return info; } template <> int cblas_getrs( char trans, int n, int nrhs, double* A, int lda, int* ipiv, double* B, int ldb) { int info; dgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); return info; } template <> int cblas_getrs( char trans, int n, int nrhs, hipblasComplex* A, int lda, int* ipiv, hipblasComplex* B, int ldb) { int info; cgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); return info; } template <> int cblas_getrs(char trans, int n, int nrhs, hipblasDoubleComplex* A, int lda, int* ipiv, hipblasDoubleComplex* B, int ldb) { int info; zgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); return info; } // getri template <> int cblas_getri(int n, float* A, int lda, int* ipiv, float* work, int lwork) { int info; sgetri_(&n, A, &lda, ipiv, work, &lwork, &info); return info; } template <> int cblas_getri(int n, double* A, int lda, int* ipiv, double* work, int lwork) { int info; dgetri_(&n, A, &lda, ipiv, work, &lwork, &info); return info; } template <> int cblas_getri( int n, hipblasComplex* A, int lda, int* ipiv, hipblasComplex* work, int lwork) { int info; cgetri_(&n, A, &lda, ipiv, work, &lwork, &info); return info; } template <> int cblas_getri( int n, hipblasDoubleComplex* A, int lda, int* ipiv, hipblasDoubleComplex* work, int lwork) { int info; zgetri_(&n, A, &lda, ipiv, work, &lwork, &info); return info; } // geqrf template <> int cblas_geqrf(int m, int n, float* A, int lda, float* tau, float* work, int lwork) { int info; sgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); return info; } template <> int cblas_geqrf(int m, int n, double* A, int lda, double* tau, double* work, int lwork) { int info; dgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); return info; } template <> int cblas_geqrf( int m, int n, hipblasComplex* A, int lda, hipblasComplex* tau, hipblasComplex* work, int lwork) { int info; cgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); return info; } template <> int cblas_geqrf(int m, int n, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* tau, hipblasDoubleComplex* work, int lwork) { int info; zgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); return info; } // gels template <> int cblas_gels(char trans, int m, int n, int nrhs, float* A, int lda, float* B, int ldb, float* work, int lwork) { int info; sgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info); return info; } template <> int cblas_gels(char trans, int m, int n, int nrhs, double* A, int lda, double* B, int ldb, double* work, int lwork) { int info; dgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info); return info; } template <> int cblas_gels(char trans, int m, int n, int nrhs, hipblasComplex* A, int lda, hipblasComplex* B, int ldb, hipblasComplex* work, int lwork) { int info; cgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info); return info; } template <> int cblas_gels(char trans, int m, int n, int nrhs, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex* work, int lwork) { int info; zgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info); return info; } hipBLAS-rocm-5.5.1/clients/common/clients_common.cpp000066400000000000000000001644361434647641600224050ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "hipblas.hpp" #include "argument_model.hpp" #include "hipblas_data.hpp" #include "hipblas_datatype2string.hpp" #include "hipblas_parse_data.hpp" #include "hipblas_test.hpp" #include "test_cleanup.hpp" #include "type_dispatch.hpp" #include "utility.h" #include #include #include #include #include #include #include #include #include #include // aux #include "testing_set_get_matrix.hpp" #include "testing_set_get_matrix_async.hpp" #include "testing_set_get_vector.hpp" #include "testing_set_get_vector_async.hpp" // blas1 #include "testing_asum.hpp" #include "testing_asum_batched.hpp" #include "testing_asum_strided_batched.hpp" #include "testing_axpy.hpp" #include "testing_axpy_batched.hpp" #include "testing_axpy_batched_ex.hpp" #include "testing_axpy_ex.hpp" #include "testing_axpy_strided_batched.hpp" #include "testing_axpy_strided_batched_ex.hpp" #include "testing_copy.hpp" #include "testing_copy_batched.hpp" #include "testing_copy_strided_batched.hpp" #include "testing_dot.hpp" #include "testing_dot_batched.hpp" #include "testing_dot_batched_ex.hpp" #include "testing_dot_ex.hpp" #include "testing_dot_strided_batched.hpp" #include "testing_dot_strided_batched_ex.hpp" #include "testing_iamax_iamin.hpp" #include "testing_iamax_iamin_batched.hpp" #include "testing_iamax_iamin_strided_batched.hpp" #include "testing_nrm2.hpp" #include "testing_nrm2_batched.hpp" #include "testing_nrm2_batched_ex.hpp" #include "testing_nrm2_ex.hpp" #include "testing_nrm2_strided_batched.hpp" #include "testing_nrm2_strided_batched_ex.hpp" #include "testing_rot.hpp" #include "testing_rot_batched.hpp" #include "testing_rot_batched_ex.hpp" #include "testing_rot_ex.hpp" #include "testing_rot_strided_batched.hpp" #include "testing_rot_strided_batched_ex.hpp" #include "testing_rotg.hpp" #include "testing_rotg_batched.hpp" #include "testing_rotg_strided_batched.hpp" #include "testing_rotm.hpp" #include "testing_rotm_batched.hpp" #include "testing_rotm_strided_batched.hpp" #include "testing_rotmg.hpp" #include "testing_rotmg_batched.hpp" #include "testing_rotmg_strided_batched.hpp" #include "testing_scal.hpp" #include "testing_scal_batched.hpp" #include "testing_scal_batched_ex.hpp" #include "testing_scal_ex.hpp" #include "testing_scal_strided_batched.hpp" #include "testing_scal_strided_batched_ex.hpp" #include "testing_swap.hpp" #include "testing_swap_batched.hpp" #include "testing_swap_strided_batched.hpp" // blas2 #include "testing_gbmv.hpp" #include "testing_gbmv_batched.hpp" #include "testing_gbmv_strided_batched.hpp" #include "testing_gemv.hpp" #include "testing_gemv_batched.hpp" #include "testing_gemv_strided_batched.hpp" #include "testing_ger.hpp" #include "testing_ger_batched.hpp" #include "testing_ger_strided_batched.hpp" #include "testing_hbmv.hpp" #include "testing_hbmv_batched.hpp" #include "testing_hbmv_strided_batched.hpp" #include "testing_hemv.hpp" #include "testing_hemv_batched.hpp" #include "testing_hemv_strided_batched.hpp" #include "testing_her.hpp" #include "testing_her2.hpp" #include "testing_her2_batched.hpp" #include "testing_her2_strided_batched.hpp" #include "testing_her_batched.hpp" #include "testing_her_strided_batched.hpp" #include "testing_hpmv.hpp" #include "testing_hpmv_batched.hpp" #include "testing_hpmv_strided_batched.hpp" #include "testing_hpr.hpp" #include "testing_hpr2.hpp" #include "testing_hpr2_batched.hpp" #include "testing_hpr2_strided_batched.hpp" #include "testing_hpr_batched.hpp" #include "testing_hpr_strided_batched.hpp" #include "testing_sbmv.hpp" #include "testing_sbmv_batched.hpp" #include "testing_sbmv_strided_batched.hpp" #include "testing_spmv.hpp" #include "testing_spmv_batched.hpp" #include "testing_spmv_strided_batched.hpp" #include "testing_spr.hpp" #include "testing_spr2.hpp" #include "testing_spr2_batched.hpp" #include "testing_spr2_strided_batched.hpp" #include "testing_spr_batched.hpp" #include "testing_spr_strided_batched.hpp" #include "testing_symv.hpp" #include "testing_symv_batched.hpp" #include "testing_symv_strided_batched.hpp" #include "testing_syr.hpp" #include "testing_syr2.hpp" #include "testing_syr2_batched.hpp" #include "testing_syr2_strided_batched.hpp" #include "testing_syr_batched.hpp" #include "testing_syr_strided_batched.hpp" #include "testing_tbmv.hpp" #include "testing_tbmv_batched.hpp" #include "testing_tbmv_strided_batched.hpp" #include "testing_tbsv.hpp" #include "testing_tbsv_batched.hpp" #include "testing_tbsv_strided_batched.hpp" #include "testing_tpmv.hpp" #include "testing_tpmv_batched.hpp" #include "testing_tpmv_strided_batched.hpp" #include "testing_tpsv.hpp" #include "testing_tpsv_batched.hpp" #include "testing_tpsv_strided_batched.hpp" #include "testing_trmv.hpp" #include "testing_trmv_batched.hpp" #include "testing_trmv_strided_batched.hpp" // blas3 #include "syrkx_reference.hpp" #include "testing_dgmm.hpp" #include "testing_dgmm_batched.hpp" #include "testing_dgmm_strided_batched.hpp" #include "testing_geam.hpp" #include "testing_geam_batched.hpp" #include "testing_geam_strided_batched.hpp" #include "testing_gemm.hpp" #include "testing_gemm_batched.hpp" #include "testing_gemm_batched_ex.hpp" #include "testing_gemm_ex.hpp" #include "testing_gemm_strided_batched.hpp" #include "testing_gemm_strided_batched_ex.hpp" #include "testing_hemm.hpp" #include "testing_hemm_batched.hpp" #include "testing_hemm_strided_batched.hpp" #include "testing_her2k.hpp" #include "testing_her2k_batched.hpp" #include "testing_her2k_strided_batched.hpp" #include "testing_herk.hpp" #include "testing_herk_batched.hpp" #include "testing_herk_strided_batched.hpp" #include "testing_herkx.hpp" #include "testing_herkx_batched.hpp" #include "testing_herkx_strided_batched.hpp" #include "testing_symm.hpp" #include "testing_symm_batched.hpp" #include "testing_symm_strided_batched.hpp" #include "testing_syr2k.hpp" #include "testing_syr2k_batched.hpp" #include "testing_syr2k_strided_batched.hpp" #include "testing_syrk.hpp" #include "testing_syrk_batched.hpp" #include "testing_syrk_strided_batched.hpp" #include "testing_syrkx.hpp" #include "testing_syrkx_batched.hpp" #include "testing_syrkx_strided_batched.hpp" #include "testing_trmm.hpp" #include "testing_trmm_batched.hpp" #include "testing_trmm_strided_batched.hpp" #include "testing_trsm.hpp" #include "testing_trsm_batched.hpp" #include "testing_trsm_batched_ex.hpp" #include "testing_trsm_ex.hpp" #include "testing_trsm_strided_batched.hpp" #include "testing_trsm_strided_batched_ex.hpp" #include "testing_trsv.hpp" #include "testing_trsv_batched.hpp" #include "testing_trsv_strided_batched.hpp" #include "testing_trtri.hpp" #include "testing_trtri_batched.hpp" #include "testing_trtri_strided_batched.hpp" // solver functions #ifdef __HIP_PLATFORM_SOLVER__ #include "testing_gels.hpp" #include "testing_gels_batched.hpp" #include "testing_gels_strided_batched.hpp" #include "testing_geqrf.hpp" #include "testing_geqrf_batched.hpp" #include "testing_geqrf_strided_batched.hpp" #include "testing_getrf.hpp" #include "testing_getrf_batched.hpp" #include "testing_getrf_npvt.hpp" #include "testing_getrf_npvt_batched.hpp" #include "testing_getrf_npvt_strided_batched.hpp" #include "testing_getrf_strided_batched.hpp" #include "testing_getri_batched.hpp" #include "testing_getri_npvt_batched.hpp" #include "testing_getrs.hpp" #include "testing_getrs_batched.hpp" #include "testing_getrs_strided_batched.hpp" #endif #include "utility.h" #include #undef I //using namespace roc; // For emulated program_options using namespace std::literals; // For std::string literals of form "str"s typedef int hipblas_int; struct str_less { bool operator()(const char* a, const char* b) const { return strcmp(a, b) < 0; } }; // Map from const char* to function taking const Arguments& using comparison above using func_map = std::map; // Run a function by using map to map arg.function to function void run_function(const func_map& map, const Arguments& arg, const std::string& msg = "") { auto match = map.find(arg.function); if(match == map.end()) throw std::invalid_argument("Invalid combination --function "s + arg.function + " --a_type "s + hipblas_datatype2string(arg.a_type) + msg); match->second(arg); } void get_test_name(const Arguments& arg, std::string& name) { // Map from const char* to function taking const Arguments& using comparison above using name_to_f_testname_map = std::map; static const name_to_f_testname_map fmap = { // L1 {"asum", testname_asum}, {"asum_batched", testname_asum_batched}, {"asum_strided_batched", testname_asum_strided_batched}, {"axpy", testname_axpy}, {"axpy_batched", testname_axpy_batched}, {"axpy_strided_batched", testname_axpy_strided_batched}, {"axpy_ex", testname_axpy_ex}, {"axpy_batched_ex", testname_axpy_batched_ex}, {"axpy_strided_batched_ex", testname_axpy_strided_batched_ex}, {"copy", testname_copy}, {"copy_batched", testname_copy_batched}, {"copy_strided_batched", testname_copy_strided_batched}, {"dot", testname_dot}, {"dot_batched", testname_dot_batched}, {"dot_strided_batched", testname_dot_strided_batched}, {"dotc", testname_dotc}, {"dotc_batched", testname_dotc_batched}, {"dotc_strided_batched", testname_dotc_strided_batched}, {"iamax", testname_amax}, {"iamax_batched", testname_amax_batched}, {"iamax_strided_batched", testname_amax_strided_batched}, {"iamin", testname_amin}, {"iamin_batched", testname_amin_batched}, {"iamin_strided_batched", testname_amin_strided_batched}, {"nrm2", testname_nrm2}, {"nrm2_batched", testname_nrm2_batched}, {"nrm2_strided_batched", testname_nrm2_strided_batched}, {"nrm2_ex", testname_nrm2_ex}, {"nrm2_batched_ex", testname_nrm2_batched_ex}, {"nrm2_strided_batched_ex", testname_nrm2_strided_batched_ex}, {"rot", testname_rot}, {"rot_batched", testname_rot_batched}, {"rot_strided_batched", testname_rot_strided_batched}, {"rot_ex", testname_rot_ex}, {"rot_batched_ex", testname_rot_batched_ex}, {"rot_strided_batched_ex", testname_rot_strided_batched_ex}, {"rotg", testname_rotg}, {"rotg_batched", testname_rotg_batched}, {"rotg_strided_batched", testname_rotg_strided_batched}, {"rotm", testname_rotm}, {"rotm_batched", testname_rotm_batched}, {"rotm_strided_batched", testname_rotm_strided_batched}, {"rotmg", testname_rotmg}, {"rotmg_batched", testname_rotmg_batched}, {"rotmg_strided_batched", testname_rotmg_strided_batched}, {"swap", testname_swap}, {"swap_batched", testname_swap_batched}, {"swap_strided_batched", testname_swap_strided_batched}, {"scal", testname_scal}, {"scal_batched", testname_scal_batched}, {"scal_strided_batched", testname_scal_strided_batched}, {"scal_ex", testname_scal_ex}, {"scal_batched_ex", testname_scal_batched_ex}, {"scal_strided_batched_ex", testname_scal_strided_batched_ex}, // L2 {"gbmv", testname_gbmv}, {"gbmv_batched", testname_gbmv_batched}, {"gbmv_strided_batched", testname_gbmv_strided_batched}, {"gemv", testname_gemv}, {"gemv_batched", testname_gemv_batched}, {"gemv_strided_batched", testname_gemv_strided_batched}, {"ger", testname_ger}, {"ger_batched", testname_ger_batched}, {"ger_strided_batched", testname_ger_strided_batched}, {"geru", testname_ger}, {"geru_batched", testname_ger_batched}, {"geru_strided_batched", testname_ger_strided_batched}, {"gerc", testname_ger}, {"gerc_batched", testname_ger_batched}, {"gerc_strided_batched", testname_ger_strided_batched}, {"hbmv", testname_hbmv}, {"hbmv_batched", testname_hbmv_batched}, {"hbmv_strided_batched", testname_hbmv_strided_batched}, {"hemv", testname_hemv}, {"hemv_batched", testname_hemv_batched}, {"hemv_strided_batched", testname_hemv_strided_batched}, {"her", testname_her}, {"her_batched", testname_her_batched}, {"her_strided_batched", testname_her_strided_batched}, {"her2", testname_her2}, {"her2_batched", testname_her2_batched}, {"her2_strided_batched", testname_her2_strided_batched}, {"hpmv", testname_hpmv}, {"hpmv_batched", testname_hpmv_batched}, {"hpmv_strided_batched", testname_hpmv_strided_batched}, {"hpr", testname_hpr}, {"hpr_batched", testname_hpr_batched}, {"hpr_strided_batched", testname_hpr_strided_batched}, {"hpr2", testname_hpr2}, {"hpr2_batched", testname_hpr2_batched}, {"hpr2_strided_batched", testname_hpr2_strided_batched}, {"sbmv", testname_sbmv}, {"sbmv_batched", testname_sbmv_batched}, {"sbmv_strided_batched", testname_sbmv_strided_batched}, {"spmv", testname_spmv}, {"spmv_batched", testname_spmv_batched}, {"spmv_strided_batched", testname_spmv_strided_batched}, {"spr", testname_spr}, {"spr_batched", testname_spr_batched}, {"spr_strided_batched", testname_spr_strided_batched}, {"spr2", testname_spr2}, {"spr2_batched", testname_spr2_batched}, {"spr2_strided_batched", testname_spr2_strided_batched}, {"symv", testname_symv}, {"symv_batched", testname_symv_batched}, {"symv_strided_batched", testname_symv_strided_batched}, {"syr", testname_syr}, {"syr_batched", testname_syr_batched}, {"syr_strided_batched", testname_syr_strided_batched}, {"syr2", testname_syr2}, {"syr2_batched", testname_syr2_batched}, {"syr2_strided_batched", testname_syr2_strided_batched}, {"tbmv", testname_tbmv}, {"tbmv_batched", testname_tbmv_batched}, {"tbmv_strided_batched", testname_tbmv_strided_batched}, {"tbsv", testname_tbsv}, {"tbsv_batched", testname_tbsv_batched}, {"tbsv_strided_batched", testname_tbsv_strided_batched}, {"tpmv", testname_tpmv}, {"tpmv_batched", testname_tpmv_batched}, {"tpmv_strided_batched", testname_tpmv_strided_batched}, {"tpsv", testname_tpsv}, {"tpsv_batched", testname_tpsv_batched}, {"tpsv_strided_batched", testname_tpsv_strided_batched}, {"trmv", testname_trmv}, {"trmv_batched", testname_trmv_batched}, {"trmv_strided_batched", testname_trmv_strided_batched}, {"trsv", testname_trsv}, {"trsv_batched", testname_trsv_batched}, {"trsv_strided_batched", testname_trsv_strided_batched}, // L3 {"dgmm", testname_dgmm}, {"dgmm_batched", testname_dgmm_batched}, {"dgmm_strided_batched", testname_dgmm_strided_batched}, {"geam", testname_geam}, {"geam_batched", testname_geam_batched}, {"geam_strided_batched", testname_geam_strided_batched}, {"gemm", testname_gemm}, {"gemm_batched", testname_gemm_batched}, {"gemm_strided_batched", testname_gemm_strided_batched}, {"gemm_ex", testname_gemm_ex}, {"gemm_batched_ex", testname_gemm_batched_ex}, {"gemm_strided_batched_ex", testname_gemm_strided_batched_ex}, {"hemm", testname_hemm}, {"hemm_batched", testname_hemm_batched}, {"hemm_strided_batched", testname_hemm_strided_batched}, {"herk", testname_herk}, {"herk_batched", testname_herk_batched}, {"herk_strided_batched", testname_herk_strided_batched}, {"her2k", testname_her2k}, {"her2k_batched", testname_her2k_batched}, {"her2k_strided_batched", testname_her2k_strided_batched}, {"herkx", testname_herkx}, {"herkx_batched", testname_herkx_batched}, {"herkx_strided_batched", testname_herkx_strided_batched}, {"symm", testname_symm}, {"symm_batched", testname_symm_batched}, {"symm_strided_batched", testname_symm_strided_batched}, {"syrk", testname_syrk}, {"syrk_batched", testname_syrk_batched}, {"syrk_strided_batched", testname_syrk_strided_batched}, {"syr2k", testname_syr2k}, {"syr2k_batched", testname_syr2k_batched}, {"syr2k_strided_batched", testname_syr2k_strided_batched}, {"syrkx", testname_syrkx}, {"syrkx_batched", testname_syrkx_batched}, {"syrkx_strided_batched", testname_syrkx_strided_batched}, {"trmm", testname_trmm}, {"trmm_batched", testname_trmm_batched}, {"trmm_strided_batched", testname_trmm_strided_batched}, {"trsm", testname_trsm}, {"trsm_batched", testname_trsm_batched}, {"trsm_strided_batched", testname_trsm_strided_batched}, {"trsm_ex", testname_trsm_ex}, {"trsm_batched_ex", testname_trsm_batched_ex}, {"trsm_strided_batched_ex", testname_trsm_strided_batched_ex}, {"trtri", testname_trtri}, {"trtri_batched", testname_trtri_batched}, {"trtri_strided_batched", testname_trtri_strided_batched}, #ifdef __HIP_PLATFORM_SOLVER__ {"geqrf", testname_geqrf}, {"geqrf_batched", testname_geqrf_batched}, {"geqrf_strided_batched", testname_geqrf_strided_batched}, {"getrf", testname_getrf}, {"getrf_batched", testname_getrf_batched}, {"getrf_strided_batched", testname_getrf_strided_batched}, {"getrf_npvt", testname_getrf_npvt}, {"getrf_npvt_batched", testname_getrf_npvt_batched}, {"getrf_npvt_strided_batched", testname_getrf_npvt_strided_batched}, {"getri_batched", testname_getri_batched}, {"getri_npvt_batched", testname_getri_npvt_batched}, {"getrs", testname_getrs}, {"getrs_batched", testname_getrs_batched}, {"getrs_strided_batched", testname_getrs_strided_batched}, {"gels", testname_gels}, {"gels_batched", testname_gels_batched}, {"gels_strided_batched", testname_gels_strided_batched}, #endif // Aux {"set_get_vector", testname_set_get_vector}, {"set_get_vector_async", testname_set_get_vector_async}, {"set_get_matrix", testname_set_get_matrix}, {"set_get_matrix_async", testname_set_get_matrix_async}, }; auto match = fmap.find(arg.function); if(match != fmap.end()) match->second(arg, name); } // Template to dispatch testing_gemm_ex for performance tests // When Ti == void or Ti == To == Tc == bfloat16, the test is marked invalid template struct perf_gemm_ex : hipblas_test_invalid { }; template struct perf_gemm_ex{} && !(std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"gemm_ex", testing_gemm_ex_template}, {"gemm_batched_ex", testing_gemm_batched_ex_template}, }; run_function(map, arg); } }; // Template to dispatch testing_gemm_strided_batched_ex for performance tests // When Ti == void or Ti == To == Tc == bfloat16, the test is marked invalid template struct perf_gemm_strided_batched_ex : hipblas_test_invalid { }; template struct perf_gemm_strided_batched_ex< Ti, To, Tc, std::enable_if_t{} && !(std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"gemm_strided_batched_ex", testing_gemm_strided_batched_ex_template}, }; run_function(map, arg); } }; template struct perf_blas : hipblas_test_invalid { }; template struct perf_blas{} || std::is_same{}>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map fmap = { // L1 {"asum", testing_asum}, {"asum_batched", testing_asum_batched}, {"asum_strided_batched", testing_asum_strided_batched}, {"axpy", testing_axpy}, {"axpy_batched", testing_axpy_batched}, {"axpy_strided_batched", testing_axpy_strided_batched}, {"copy", testing_copy}, {"copy_batched", testing_copy_batched}, {"copy_strided_batched", testing_copy_strided_batched}, {"dot", testing_dot}, {"dot_batched", testing_dot_batched}, {"dot_strided_batched", testing_dot_strided_batched}, {"iamax", testing_amax}, {"iamax_batched", testing_amax_batched}, {"iamax_strided_batched", testing_amax_strided_batched}, {"iamin", testing_amin}, {"iamin_batched", testing_amin_batched}, {"iamin_strided_batched", testing_amin_strided_batched}, {"nrm2", testing_nrm2}, {"nrm2_batched", testing_nrm2_batched}, {"nrm2_strided_batched", testing_nrm2_strided_batched}, {"rotg", testing_rotg}, {"rotg_batched", testing_rotg_batched}, {"rotg_strided_batched", testing_rotg_strided_batched}, {"rotm", testing_rotm}, {"rotm_batched", testing_rotm_batched}, {"rotm_strided_batched", testing_rotm_strided_batched}, {"rotmg", testing_rotmg}, {"rotmg_batched", testing_rotmg_batched}, {"rotmg_strided_batched", testing_rotmg_strided_batched}, {"swap", testing_swap}, {"swap_batched", testing_swap_batched}, {"swap_strided_batched", testing_swap_strided_batched}, {"scal", testing_scal}, {"scal_batched", testing_scal_batched}, {"scal_strided_batched", testing_scal_strided_batched}, // L2 {"gbmv", testing_gbmv}, {"gbmv_batched", testing_gbmv_batched}, {"gbmv_strided_batched", testing_gbmv_strided_batched}, {"gemv", testing_gemv}, {"gemv_batched", testing_gemv_batched}, {"gemv_strided_batched", testing_gemv_strided_batched}, {"ger", testing_ger}, {"ger_batched", testing_ger_batched}, {"ger_strided_batched", testing_ger_strided_batched}, {"sbmv", testing_sbmv}, {"sbmv_batched", testing_sbmv_batched}, {"sbmv_strided_batched", testing_sbmv_strided_batched}, {"spmv", testing_spmv}, {"spmv_batched", testing_spmv_batched}, {"spmv_strided_batched", testing_spmv_strided_batched}, {"spr", testing_spr}, {"spr_batched", testing_spr_batched}, {"spr_strided_batched", testing_spr_strided_batched}, {"spr2", testing_spr2}, {"spr2_batched", testing_spr2_batched}, {"spr2_strided_batched", testing_spr2_strided_batched}, {"symv", testing_symv}, {"symv_batched", testing_symv_batched}, {"symv_strided_batched", testing_symv_strided_batched}, {"syr", testing_syr}, {"syr_batched", testing_syr_batched}, {"syr_strided_batched", testing_syr_strided_batched}, {"syr2", testing_syr2}, {"syr2_batched", testing_syr2_batched}, {"syr2_strided_batched", testing_syr2_strided_batched}, {"tbmv", testing_tbmv}, {"tbmv_batched", testing_tbmv_batched}, {"tbmv_strided_batched", testing_tbmv_strided_batched}, {"tbsv", testing_tbsv}, {"tbsv_batched", testing_tbsv_batched}, {"tbsv_strided_batched", testing_tbsv_strided_batched}, {"tpmv", testing_tpmv}, {"tpmv_batched", testing_tpmv_batched}, {"tpmv_strided_batched", testing_tpmv_strided_batched}, {"tpsv", testing_tpsv}, {"tpsv_batched", testing_tpsv_batched}, {"tpsv_strided_batched", testing_tpsv_strided_batched}, {"trmv", testing_trmv}, {"trmv_batched", testing_trmv_batched}, {"trmv_strided_batched", testing_trmv_strided_batched}, {"trsv", testing_trsv}, {"trsv_batched", testing_trsv_batched}, {"trsv_strided_batched", testing_trsv_strided_batched}, // L3 {"geam", testing_geam}, {"geam_batched", testing_geam_batched}, {"geam_strided_batched", testing_geam_strided_batched}, {"dgmm", testing_dgmm}, {"dgmm_batched", testing_dgmm_batched}, {"dgmm_strided_batched", testing_dgmm_strided_batched}, {"trmm", testing_trmm}, {"trmm_batched", testing_trmm_batched}, {"trmm_strided_batched", testing_trmm_strided_batched}, {"gemm", testing_gemm}, {"gemm_batched", testing_gemm_batched}, {"gemm_strided_batched", testing_gemm_strided_batched}, {"symm", testing_symm}, {"symm_batched", testing_symm_batched}, {"symm_strided_batched", testing_symm_strided_batched}, {"syrk", testing_syrk}, {"syrk_batched", testing_syrk_batched}, {"syrk_strided_batched", testing_syrk_strided_batched}, {"syr2k", testing_syr2k}, {"syr2k_batched", testing_syr2k_batched}, {"syr2k_strided_batched", testing_syr2k_strided_batched}, {"trtri", testing_trtri}, {"trtri_batched", testing_trtri_batched}, {"trtri_strided_batched", testing_trtri_strided_batched}, {"syrkx", testing_syrkx}, {"syrkx_batched", testing_syrkx_batched}, {"syrkx_strided_batched", testing_syrkx_strided_batched}, {"trsm", testing_trsm}, {"trsm_ex", testing_trsm_ex}, {"trsm_batched", testing_trsm_batched}, {"trsm_batched_ex", testing_trsm_batched_ex}, {"trsm_strided_batched", testing_trsm_strided_batched}, {"trsm_strided_batched_ex", testing_trsm_strided_batched_ex}, #ifdef __HIP_PLATFORM_SOLVER__ {"geqrf", testing_geqrf}, {"geqrf_batched", testing_geqrf_batched}, {"geqrf_strided_batched", testing_geqrf_strided_batched}, {"getrf", testing_getrf}, {"getrf_batched", testing_getrf_batched}, {"getrf_strided_batched", testing_getrf_strided_batched}, {"getrf_npvt", testing_getrf_npvt}, {"getrf_npvt_batched", testing_getrf_npvt_batched}, {"getrf_npvt_strided_batched", testing_getrf_npvt_strided_batched}, {"getri_batched", testing_getri_batched}, {"getri_npvt_batched", testing_getri_npvt_batched}, {"getrs", testing_getrs}, {"getrs_batched", testing_getrs_batched}, {"getrs_strided_batched", testing_getrs_strided_batched}, {"gels", testing_gels}, {"gels_batched", testing_gels_batched}, {"gels_strided_batched", testing_gels_strided_batched}, #endif // Aux {"set_get_vector", testing_set_get_vector}, {"set_get_vector_async", testing_set_get_vector_async}, {"set_get_matrix", testing_set_get_matrix}, {"set_get_matrix_async", testing_set_get_matrix_async}, }; run_function(fmap, arg); } }; template struct perf_blas{}>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"dot", testing_dot}, {"dot_batched", testing_dot_batched}, {"dot_strided_batched", testing_dot_strided_batched}, }; run_function(map, arg); } }; template struct perf_blas{}>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"axpy", testing_axpy}, {"axpy_batched", testing_axpy_batched}, {"axpy_strided_batched", testing_axpy_strided_batched}, {"dot", testing_dot}, {"dot_batched", testing_dot_batched}, {"dot_strided_batched", testing_dot_strided_batched}, {"gemm", testing_gemm}, {"gemm_batched", testing_gemm_batched}, {"gemm_strided_batched", testing_gemm_strided_batched}, }; run_function(map, arg); } }; template struct perf_blas< T, U, std::enable_if_t{} || std::is_same{}>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { // L1 {"asum", testing_asum}, {"asum_batched", testing_asum_batched}, {"asum_strided_batched", testing_asum_strided_batched}, {"axpy", testing_axpy}, {"axpy_batched", testing_axpy_batched}, {"axpy_strided_batched", testing_axpy_strided_batched}, {"copy", testing_copy}, {"copy_batched", testing_copy_batched}, {"copy_strided_batched", testing_copy_strided_batched}, {"dot", testing_dot}, {"dot_batched", testing_dot_batched}, {"dot_strided_batched", testing_dot_strided_batched}, {"dotc", testing_dotc}, {"dotc_batched", testing_dotc_batched}, {"dotc_strided_batched", testing_dotc_strided_batched}, {"iamax", testing_amax}, {"iamax_batched", testing_amax_batched}, {"iamax_strided_batched", testing_amax_strided_batched}, {"iamin", testing_amin}, {"iamin_batched", testing_amin_batched}, {"iamin_strided_batched", testing_amin_strided_batched}, {"nrm2", testing_nrm2}, {"nrm2_batched", testing_nrm2_batched}, {"nrm2_strided_batched", testing_nrm2_strided_batched}, {"rotg", testing_rotg}, {"rotg_batched", testing_rotg_batched}, {"rotg_strided_batched", testing_rotg_strided_batched}, {"swap", testing_swap}, {"swap_batched", testing_swap_batched}, {"swap_strided_batched", testing_swap_strided_batched}, {"scal", testing_scal}, {"scal_batched", testing_scal_batched}, {"scal_strided_batched", testing_scal_strided_batched}, // L2 {"gemv", testing_gemv}, {"gemv_batched", testing_gemv_batched}, {"gemv_strided_batched", testing_gemv_strided_batched}, {"gbmv", testing_gbmv}, {"gbmv_batched", testing_gbmv_batched}, {"gbmv_strided_batched", testing_gbmv_strided_batched}, {"geru", testing_ger}, {"geru_batched", testing_ger_batched}, {"geru_strided_batched", testing_ger_strided_batched}, {"gerc", testing_ger}, {"gerc_batched", testing_ger_batched}, {"gerc_strided_batched", testing_ger_strided_batched}, {"hbmv", testing_hbmv}, {"hbmv_batched", testing_hbmv_batched}, {"hbmv_strided_batched", testing_hbmv_strided_batched}, {"hemv", testing_hemv}, {"hemv_batched", testing_hemv_batched}, {"hemv_strided_batched", testing_hemv_strided_batched}, {"her", testing_her}, {"her_batched", testing_her_batched}, {"her_strided_batched", testing_her_strided_batched}, {"her2", testing_her2}, {"her2_batched", testing_her2_batched}, {"her2_strided_batched", testing_her2_strided_batched}, {"hpmv", testing_hpmv}, {"hpmv_batched", testing_hpmv_batched}, {"hpmv_strided_batched", testing_hpmv_strided_batched}, {"hpr", testing_hpr}, {"hpr_batched", testing_hpr_batched}, {"hpr_strided_batched", testing_hpr_strided_batched}, {"hpr2", testing_hpr2}, {"hpr2_batched", testing_hpr2_batched}, {"hpr2_strided_batched", testing_hpr2_strided_batched}, {"spr", testing_spr}, {"spr_batched", testing_spr_batched}, {"spr_strided_batched", testing_spr_strided_batched}, {"symv", testing_symv}, {"symv_batched", testing_symv_batched}, {"symv_strided_batched", testing_symv_strided_batched}, {"syr", testing_syr}, {"syr_batched", testing_syr_batched}, {"syr_strided_batched", testing_syr_strided_batched}, {"syr2", testing_syr2}, {"syr2_batched", testing_syr2_batched}, {"syr2_strided_batched", testing_syr2_strided_batched}, {"tbmv", testing_tbmv}, {"tbmv_batched", testing_tbmv_batched}, {"tbmv_strided_batched", testing_tbmv_strided_batched}, {"tbsv", testing_tbsv}, {"tbsv_batched", testing_tbsv_batched}, {"tbsv_strided_batched", testing_tbsv_strided_batched}, {"tpmv", testing_tpmv}, {"tpmv_batched", testing_tpmv_batched}, {"tpmv_strided_batched", testing_tpmv_strided_batched}, {"tpsv", testing_tpsv}, {"tpsv_batched", testing_tpsv_batched}, {"tpsv_strided_batched", testing_tpsv_strided_batched}, {"trmv", testing_trmv}, {"trmv_batched", testing_trmv_batched}, {"trmv_strided_batched", testing_trmv_strided_batched}, {"trsv", testing_trsv}, {"trsv_batched", testing_trsv_batched}, {"trsv_strided_batched", testing_trsv_strided_batched}, // L3 {"dgmm", testing_dgmm}, {"dgmm_batched", testing_dgmm_batched}, {"dgmm_strided_batched", testing_dgmm_strided_batched}, {"geam", testing_geam}, {"geam_batched", testing_geam_batched}, {"geam_strided_batched", testing_geam_strided_batched}, {"gemm", testing_gemm}, {"gemm_batched", testing_gemm_batched}, {"gemm_strided_batched", testing_gemm_strided_batched}, {"hemm", testing_hemm}, {"hemm_batched", testing_hemm_batched}, {"hemm_strided_batched", testing_hemm_strided_batched}, {"herk", testing_herk}, {"herk_batched", testing_herk_batched}, {"herk_strided_batched", testing_herk_strided_batched}, {"her2k", testing_her2k}, {"her2k_batched", testing_her2k_batched}, {"her2k_strided_batched", testing_her2k_strided_batched}, {"herkx", testing_herkx}, {"herkx_batched", testing_herkx_batched}, {"herkx_strided_batched", testing_herkx_strided_batched}, {"symm", testing_symm}, {"symm_batched", testing_symm_batched}, {"symm_strided_batched", testing_symm_strided_batched}, {"syrk", testing_syrk}, {"syrk_batched", testing_syrk_batched}, {"syrk_strided_batched", testing_syrk_strided_batched}, {"syr2k", testing_syr2k}, {"syr2k_batched", testing_syr2k_batched}, {"syr2k_strided_batched", testing_syr2k_strided_batched}, {"trtri", testing_trtri}, {"trtri_batched", testing_trtri_batched}, {"trtri_strided_batched", testing_trtri_strided_batched}, {"syrkx", testing_syrkx}, {"syrkx_batched", testing_syrkx_batched}, {"syrkx_strided_batched", testing_syrkx_strided_batched}, {"trsm", testing_trsm}, {"trsm_batched", testing_trsm_batched}, {"trsm_strided_batched", testing_trsm_strided_batched}, {"trsm_ex", testing_trsm_ex}, {"trsm_batched_ex", testing_trsm_batched_ex}, {"trsm_strided_batched_ex", testing_trsm_strided_batched_ex}, {"trmm", testing_trmm}, {"trmm_batched", testing_trmm_batched}, {"trmm_strided_batched", testing_trmm_strided_batched}, #ifdef __HIP_PLATFORM_SOLVER__ {"geqrf", testing_geqrf}, {"geqrf_batched", testing_geqrf_batched}, {"geqrf_strided_batched", testing_geqrf_strided_batched}, {"getrf", testing_getrf}, {"getrf_batched", testing_getrf_batched}, {"getrf_strided_batched", testing_getrf_strided_batched}, {"getrf_npvt", testing_getrf_npvt}, {"getrf_npvt_batched", testing_getrf_npvt_batched}, {"getrf_npvt_strided_batched", testing_getrf_npvt_strided_batched}, {"getri_batched", testing_getri_batched}, {"getri_npvt_batched", testing_getri_npvt_batched}, {"getrs", testing_getrs}, {"getrs_batched", testing_getrs_batched}, {"getrs_strided_batched", testing_getrs_strided_batched}, {"gels", testing_gels}, {"gels_batched", testing_gels_batched}, {"gels_strided_batched", testing_gels_strided_batched}, #endif }; run_function(map, arg); } }; template struct perf_blas_axpy_ex : hipblas_test_invalid { }; template struct perf_blas_axpy_ex< Ta, Tx, Ty, Tex, std::enable_if_t<((std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}))>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"axpy_ex", testing_axpy_ex_template}, {"axpy_batched_ex", testing_axpy_batched_ex_template}, {"axpy_strided_batched_ex", testing_axpy_strided_batched_ex_template}, }; run_function(map, arg); } }; template struct perf_blas_dot_ex : hipblas_test_invalid { }; template struct perf_blas_dot_ex< Tx, Ty, Tr, Tex, std::enable_if_t<(std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"dot_ex", testing_dot_ex_template}, {"dot_batched_ex", testing_dot_batched_ex_template}, {"dot_strided_batched_ex", testing_dot_strided_batched_ex_template}, {"dotc_ex", testing_dot_ex_template}, {"dotc_batched_ex", testing_dot_batched_ex_template}, {"dotc_strided_batched_ex", testing_dot_strided_batched_ex_template}, }; run_function(map, arg); } }; template struct perf_blas_nrm2_ex : hipblas_test_invalid { }; template struct perf_blas_nrm2_ex< Tx, Tr, Tex, std::enable_if_t< (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"nrm2_ex", testing_nrm2_ex_template}, {"nrm2_batched_ex", testing_nrm2_batched_ex_template}, {"nrm2_strided_batched_ex", testing_nrm2_strided_batched_ex_template}, }; run_function(map, arg); } }; template struct perf_blas_rot_ex : hipblas_test_invalid { }; template struct perf_blas_rot_ex< Tx, Ty, Tcs, Tex, std::enable_if_t<(std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"rot_ex", testing_rot_ex_template}, {"rot_batched_ex", testing_rot_batched_ex_template}, {"rot_strided_batched_ex", testing_rot_strided_batched_ex_template}, }; run_function(map, arg); } }; template struct perf_blas_rot : hipblas_test_invalid { }; template struct perf_blas_rot< Ti, To, Tc, std::enable_if_t<(std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"rot", testing_rot}, {"rot_batched", testing_rot_batched}, {"rot_strided_batched", testing_rot_strided_batched}, }; run_function(map, arg); } }; template struct perf_blas_scal : hipblas_test_invalid { }; template struct perf_blas_scal< Ta, Tb, std::enable_if_t<(std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"scal", testing_scal}, {"scal_batched", testing_scal_batched}, {"scal_strided_batched", testing_scal_strided_batched}, }; run_function(map, arg); } }; template struct perf_blas_scal_ex : hipblas_test_invalid { }; template struct perf_blas_scal_ex< Ta, Tx, Tex, std::enable_if_t< (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"scal_ex", testing_scal_ex_template}, {"scal_batched_ex", testing_scal_batched_ex_template}, {"scal_strided_batched_ex", testing_scal_strided_batched_ex_template}, }; run_function(map, arg); } }; int run_bench_test(Arguments& arg, int unit_check, int timing) { //hipblas_initialize(); // Initialize rocBLAS std::cout << std::setiosflags(std::ios::fixed) << std::setprecision(7); // Set precision to 7 digits // disable unit_check in client benchmark, it is only used in gtest unit test arg.unit_check = unit_check; // enable timing check,otherwise no performance data collected arg.timing = timing; // Skip past any testing_ prefix in function static constexpr char prefix[] = "testing_"; const char* function = arg.function; if(!strncmp(function, prefix, sizeof(prefix) - 1)) function += sizeof(prefix) - 1; if(!strcmp(function, "gemm") || !strcmp(function, "gemm_batched")) { // adjust dimension for GEMM routines hipblas_int min_lda = arg.transA == 'N' ? arg.M : arg.K; hipblas_int min_ldb = arg.transB == 'N' ? arg.K : arg.N; hipblas_int min_ldc = arg.M; if(arg.lda < min_lda) { std::cout << "hipblas-bench INFO: lda < min_lda, set lda = " << min_lda << std::endl; arg.lda = min_lda; } if(arg.ldb < min_ldb) { std::cout << "hipblas-bench INFO: ldb < min_ldb, set ldb = " << min_ldb << std::endl; arg.ldb = min_ldb; } if(arg.ldc < min_ldc) { std::cout << "hipblas-bench INFO: ldc < min_ldc, set ldc = " << min_ldc << std::endl; arg.ldc = min_ldc; } } else if(!strcmp(function, "gemm_strided_batched")) { // adjust dimension for GEMM routines hipblas_int min_lda = arg.transA == 'N' ? arg.M : arg.K; hipblas_int min_ldb = arg.transB == 'N' ? arg.K : arg.N; hipblas_int min_ldc = arg.M; if(arg.lda < min_lda) { std::cout << "hipblas-bench INFO: lda < min_lda, set lda = " << min_lda << std::endl; arg.lda = min_lda; } if(arg.ldb < min_ldb) { std::cout << "hipblas-bench INFO: ldb < min_ldb, set ldb = " << min_ldb << std::endl; arg.ldb = min_ldb; } if(arg.ldc < min_ldc) { std::cout << "hipblas-bench INFO: ldc < min_ldc, set ldc = " << min_ldc << std::endl; arg.ldc = min_ldc; } // hipblas_int min_stride_a = // arg.transA == 'N' ? arg.K * arg.lda : arg.M * arg.lda; // hipblas_int min_stride_b = // arg.transB == 'N' ? arg.N * arg.ldb : arg.K * arg.ldb; // hipblas_int min_stride_a = // arg.transA == 'N' ? arg.K * arg.lda : arg.M * arg.lda; // hipblas_int min_stride_b = // arg.transB == 'N' ? arg.N * arg.ldb : arg.K * arg.ldb; hipblas_int min_stride_c = arg.ldc * arg.N; // if (arg.stride_a < min_stride_a) // { // std::cout << "hipblas-bench INFO: stride_a < min_stride_a, set stride_a = " << // min_stride_a << std::endl; // arg.stride_a = min_stride_a; // } // if (arg.stride_b < min_stride_b) // { // std::cout << "hipblas-bench INFO: stride_b < min_stride_b, set stride_b = " << // min_stride_b << std::endl; // arg.stride_b = min_stride_b; // } if(arg.stride_c < min_stride_c) { std::cout << "hipblas-bench INFO: stride_c < min_stride_c, set stride_c = " << min_stride_c << std::endl; arg.stride_c = min_stride_c; } } if(!strcmp(function, "gemm_ex") || !strcmp(function, "gemm_batched_ex")) { // adjust dimension for GEMM routines hipblas_int min_lda = arg.transA == 'N' ? arg.M : arg.K; hipblas_int min_ldb = arg.transB == 'N' ? arg.K : arg.N; hipblas_int min_ldc = arg.M; hipblas_int min_ldd = arg.M; if(arg.lda < min_lda) { std::cout << "hipblas-bench INFO: lda < min_lda, set lda = " << min_lda << std::endl; arg.lda = min_lda; } if(arg.ldb < min_ldb) { std::cout << "hipblas-bench INFO: ldb < min_ldb, set ldb = " << min_ldb << std::endl; arg.ldb = min_ldb; } if(arg.ldc < min_ldc) { std::cout << "hipblas-bench INFO: ldc < min_ldc, set ldc = " << min_ldc << std::endl; arg.ldc = min_ldc; } if(arg.ldd < min_ldd) { std::cout << "hipblas-bench INFO: ldd < min_ldd, set ldd = " << min_ldc << std::endl; arg.ldd = min_ldd; } hipblas_gemm_dispatch(arg); } else if(!strcmp(function, "gemm_strided_batched_ex")) { // adjust dimension for GEMM routines hipblas_int min_lda = arg.transA == 'N' ? arg.M : arg.K; hipblas_int min_ldb = arg.transB == 'N' ? arg.K : arg.N; hipblas_int min_ldc = arg.M; hipblas_int min_ldd = arg.M; if(arg.lda < min_lda) { std::cout << "hipblas-bench INFO: lda < min_lda, set lda = " << min_lda << std::endl; arg.lda = min_lda; } if(arg.ldb < min_ldb) { std::cout << "hipblas-bench INFO: ldb < min_ldb, set ldb = " << min_ldb << std::endl; arg.ldb = min_ldb; } if(arg.ldc < min_ldc) { std::cout << "hipblas-bench INFO: ldc < min_ldc, set ldc = " << min_ldc << std::endl; arg.ldc = min_ldc; } if(arg.ldd < min_ldd) { std::cout << "hipblas-bench INFO: ldd < min_ldd, set ldd = " << min_ldc << std::endl; arg.ldd = min_ldd; } hipblas_int min_stride_c = arg.ldc * arg.N; if(arg.stride_c < min_stride_c) { std::cout << "hipblas-bench INFO: stride_c < min_stride_c, set stride_c = " << min_stride_c << std::endl; arg.stride_c = min_stride_c; } hipblas_gemm_dispatch(arg); } else { if(!strcmp(function, "scal_ex") || !strcmp(function, "scal_batched_ex") || !strcmp(function, "scal_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); /* if(!strcmp(function, "scal") || !strcmp(function, "scal_batched") || !strcmp(function, "scal_strided_batched")) hipblas_blas1_dispatch(arg); */ else if(!strcmp(function, "rot") || !strcmp(function, "rot_batched") || !strcmp(function, "rot_strided_batched")) hipblas_rot_dispatch(arg); else if(!strcmp(function, "axpy_ex") || !strcmp(function, "axpy_batched_ex") || !strcmp(function, "axpy_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); else if(!strcmp(function, "dot_ex") || !strcmp(function, "dot_batched_ex") || !strcmp(function, "dot_strided_batched_ex") || !strcmp(function, "dotc_ex") || !strcmp(function, "dotc_batched_ex") || !strcmp(function, "dotc_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); else if(!strcmp(function, "nrm2_ex") || !strcmp(function, "nrm2_batched_ex") || !strcmp(function, "nrm2_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); else if(!strcmp(function, "rot_ex") || !strcmp(function, "rot_batched_ex") || !strcmp(function, "rot_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); else hipblas_simple_dispatch(arg); } return 0; } hipBLAS-rocm-5.5.1/clients/common/hipblas_arguments.cpp000066400000000000000000000103321434647641600230640ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "hipblas_arguments.hpp" #include "tuple_helper.hpp" #include #include #include #include #include // Pairs for YAML output template std::ostream& operator<<(std::ostream& os, std::pair p) { os << p.first << ": "; os << p.second; return os; } // Function to print Arguments out to stream in YAML format std::ostream& operator<<(std::ostream& os, const Arguments& arg) { // delim starts as "{ " and becomes ", " afterwards auto print_pair = [&, delim = "{ "](const char* name, const auto& value) mutable { os << delim << std::make_pair(name, value); delim = ", "; }; // Print each (name, value) tuple pair #define NAME_VALUE_PAIR(NAME) print_pair(#NAME, arg.NAME) FOR_EACH_ARGUMENT(NAME_VALUE_PAIR, ;); // Closing brace return os << " }\n"; } // Google Tests uses this automatically with std::ostream to dump parameters /* std::ostream& operator<<(std::ostream& os, const Arguments& arg) { std::ostream oss; // Print to std::ostream, then transfer to std::ostream return os << arg; }*/ // Function to read Structures data from stream std::istream& operator>>(std::istream& is, Arguments& arg) { is.read(reinterpret_cast(&arg), sizeof(arg)); return is; } // Error message about incompatible binary file format static void validation_error [[noreturn]] (const char* name) { std::cerr << "Arguments field \"" << name << "\" does not match format.\n\n" "Fatal error: Binary test data does match input format.\n" "Ensure that hipblas_arguments.hpp and hipblas_common.yaml\n" "define exactly the same Arguments, that hipblas_gentest.py\n" "generates the data correctly, and that endianness is the same." << std::endl; abort(); } // hipblas_gentest.py is expected to conform to this format. // hipblas_gentest.py uses hipblas_common.yaml to generate this format. void Arguments::validate(std::istream& ifs) { char header[8]{}, trailer[8]{}; Arguments arg{}; ifs.read(header, sizeof(header)); ifs >> arg; ifs.read(trailer, sizeof(trailer)); if(strcmp(header, "hipBLAS")) validation_error("header"); if(strcmp(trailer, "HIPblas")) validation_error("trailer"); auto check_func = [sig = 0u](const char* name, const auto& value) mutable { static_assert(sizeof(value) <= 256, "Fatal error: Arguments field is too large (greater than 256 bytes)."); for(size_t i = 0; i < sizeof(value); ++i) { if(reinterpret_cast(&value)[i] ^ sig ^ i) validation_error(name); } sig = (sig + 89) % 256; }; // Apply check_func to each pair (name, value) of Arguments as a tuple #define CHECK_FUNC(NAME) check_func(#NAME, arg.NAME) FOR_EACH_ARGUMENT(CHECK_FUNC, ;); } hipBLAS-rocm-5.5.1/clients/common/hipblas_datatype2string.cpp000066400000000000000000000125451434647641600242130ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************ */ #include "hipblas_datatype2string.hpp" #include "hipblas.h" // clang-format off hipblas_initialization string2hipblas_initialization(const std::string& value) { return value == "rand_int" ? hipblas_initialization::rand_int : value == "trig_float" ? hipblas_initialization::trig_float : value == "hpl" ? hipblas_initialization::hpl : static_cast(0); // invalid enum } // clang-format on /* ============================================================================================ */ /* Convert hipblas constants to lapack char. */ char hipblas2char_operation(hipblasOperation_t value) { switch(value) { case HIPBLAS_OP_N: return 'N'; case HIPBLAS_OP_T: return 'T'; case HIPBLAS_OP_C: return 'C'; } return '\0'; } char hipblas2char_fill(hipblasFillMode_t value) { switch(value) { case HIPBLAS_FILL_MODE_UPPER: return 'U'; case HIPBLAS_FILL_MODE_LOWER: return 'L'; case HIPBLAS_FILL_MODE_FULL: return 'F'; } return '\0'; } char hipblas2char_diagonal(hipblasDiagType_t value) { switch(value) { case HIPBLAS_DIAG_UNIT: return 'U'; case HIPBLAS_DIAG_NON_UNIT: return 'N'; } return '\0'; } char hipblas2char_side(hipblasSideMode_t value) { switch(value) { case HIPBLAS_SIDE_LEFT: return 'L'; case HIPBLAS_SIDE_RIGHT: return 'R'; case HIPBLAS_SIDE_BOTH: return 'B'; } return '\0'; } /* ============================================================================================ */ /* Convert lapack char constants to hipblas type. */ hipblasOperation_t char2hipblas_operation(char value) { switch(value) { case 'N': return HIPBLAS_OP_N; case 'T': return HIPBLAS_OP_T; case 'C': return HIPBLAS_OP_C; case 'n': return HIPBLAS_OP_N; case 't': return HIPBLAS_OP_T; case 'c': return HIPBLAS_OP_C; } return HIPBLAS_OP_N; } hipblasFillMode_t char2hipblas_fill(char value) { switch(value) { case 'U': return HIPBLAS_FILL_MODE_UPPER; case 'L': return HIPBLAS_FILL_MODE_LOWER; case 'u': return HIPBLAS_FILL_MODE_UPPER; case 'l': return HIPBLAS_FILL_MODE_LOWER; } return HIPBLAS_FILL_MODE_LOWER; } hipblasDiagType_t char2hipblas_diagonal(char value) { switch(value) { case 'U': return HIPBLAS_DIAG_UNIT; case 'N': return HIPBLAS_DIAG_NON_UNIT; case 'u': return HIPBLAS_DIAG_UNIT; case 'n': return HIPBLAS_DIAG_NON_UNIT; } return HIPBLAS_DIAG_NON_UNIT; } hipblasSideMode_t char2hipblas_side(char value) { switch(value) { case 'L': return HIPBLAS_SIDE_LEFT; case 'R': return HIPBLAS_SIDE_RIGHT; case 'l': return HIPBLAS_SIDE_LEFT; case 'r': return HIPBLAS_SIDE_RIGHT; } return HIPBLAS_SIDE_LEFT; } // clang-format off hipblasDatatype_t string2hipblas_datatype(const std::string& value) { return value == "f16_r" || value == "h" ? HIPBLAS_R_16F : value == "f32_r" || value == "s" ? HIPBLAS_R_32F : value == "f64_r" || value == "d" ? HIPBLAS_R_64F : value == "bf16_r" ? HIPBLAS_R_16B : value == "f16_c" ? HIPBLAS_C_16B : value == "f32_c" || value == "c" ? HIPBLAS_C_32F : value == "f64_c" || value == "z" ? HIPBLAS_C_64F : value == "bf16_c" ? HIPBLAS_C_16B : value == "i8_r" ? HIPBLAS_R_8I : value == "i32_r" ? HIPBLAS_R_32I : value == "i8_c" ? HIPBLAS_C_8I : value == "i32_c" ? HIPBLAS_C_32I : value == "u8_r" ? HIPBLAS_R_8U : value == "u32_r" ? HIPBLAS_R_32U : value == "u8_c" ? HIPBLAS_C_8U : value == "u32_c" ? HIPBLAS_C_32U : HIPBLAS_DATATYPE_INVALID; } // clang-format on hipBLAS-rocm-5.5.1/clients/common/hipblas_gentest.py000077500000000000000000000575741434647641600224240ustar00rootroot00000000000000#!/usr/bin/python3 """Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import re import sys import os import argparse import ctypes from fnmatch import fnmatchcase try: # Import either the C or pure-Python YAML parser from yaml import CLoader as Loader except ImportError: from yaml import Loader import yaml # Regex for type names in the YAML file. Optional *nnn indicates array. TYPE_RE = re.compile(r'[a-z_A-Z]\w*(:?\s*\*\s*\d+)?$') # Regex for integer ranges A..B[..C] INT_RANGE_RE = re.compile( r'\s*(-?\d+)\s*\.\.\s*(-?\d+)\s*(?:\.\.\s*(-?\d+)\s*)?$') # Regex for include: YAML extension INCLUDE_RE = re.compile(r'include\s*:\s*(.*)') args = {} testcases = set() datatypes = {} param = {} def main(): args.update(parse_args().__dict__) for doc in get_yaml_docs(): process_doc(doc) def process_doc(doc): """Process one document in the YAML file""" # Ignore empty documents if not doc or not doc.get('Tests'): return # Clear datatypes and params from previous documents datatypes.clear() param.clear() # Return dictionary of all known datatypes datatypes.update(get_datatypes(doc)) # Arguments structure corresponding to C/C++ structure param['Arguments'] = type('Arguments', (ctypes.Structure,), {'_fields_': get_arguments(doc)}) # Special names which get expanded as lists of arguments param['dict_lists_to_expand'] = doc.get('Dictionary lists to expand') or () # Lists which are not expanded param['lists_to_not_expand'] = doc.get('Lists to not expand') or () # Defaults defaults = doc.get('Defaults') or {} # Known Bugs param['known_bugs'] = doc.get('Known bugs') or [] # Functions param['Functions'] = doc.get('Functions') or {} # Instantiate all of the tests, starting with defaults for test in doc['Tests']: case = defaults.copy() case.update(test) generate(case, instantiate) def parse_args(): """Parse command-line arguments, returning input and output files""" parser = argparse.ArgumentParser(description=""" Expand hipBLAS YAML test data file into binary Arguments records """) parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', '--out', dest='outfile', type=argparse.FileType('wb'), default=sys.stdout) parser.add_argument('-I', help="Add include path", action='append', dest='includes', default=[]) parser.add_argument('-t', '--template', type=argparse.FileType('r')) return parser.parse_args() def read_yaml_file(file): """Read the YAML file, processing include: lines as an extension""" file_dir = os.path.dirname(file.name) or os.getcwd() source = [] for line_no, line in enumerate(file, start=1): # Keep track of file names and line numbers for each line of YAML match = line.startswith('include') and INCLUDE_RE.match(line) if not match: source.append([line, file.name, line_no]) else: include_file = match.group(1) include_dirs = [file_dir] + args['includes'] for path in include_dirs: path = os.path.join(path, include_file) if os.path.exists(path): source.extend(read_yaml_file(open(path, 'r'))) break else: sys.exit("In file " + file.name + ", line " + str(line_no) + ", column " + str(match.start(1)+1) + ":\n" + line.rstrip() + "\n" + " " * match.start(1) + "^\nCannot open " + include_file + "\n\nInclude paths:\n" + "\n".join(include_dirs)) file.close() return source def get_yaml_docs(): """Parse the YAML file""" source = read_yaml_file(args['infile']) if args.get('template'): source = read_yaml_file(args['template']) + source source_str = ''.join([line[0] for line in source]) def mark_str(mark): line = source[mark.line] return("In file " + line[1] + ", line " + str(line[2]) + ", column " + str(mark.column + 1) + ":\n" + line[0].rstrip() + "\n" + ' ' * mark.column + "^\n") # We iterate through all of the documents to properly diagnose errors, # because the load_all generator does not handle exceptions correctly. docs = [] load = Loader(source_str) while load.check_data(): try: doc = load.get_data() except yaml.YAMLError as err: sys.exit((mark_str(err.problem_mark) if err.problem_mark else "") + (err.problem + "\n" if err.problem else "") + (err.note + "\n" if err.note else "")) else: docs.append(doc) return docs def get_datatypes(doc): """ Get datatypes from YAML doc""" dt = ctypes.__dict__.copy() for declaration in doc.get('Datatypes') or (): for name, decl in declaration.items(): if isinstance(decl, dict): # Create derived class type based on bases and attr entries dt[name] = type(name, tuple([eval(t, dt) for t in decl.get('bases') or () if TYPE_RE.match(t)] ), decl.get('attr') or {}) # Import class' attributes into the datatype namespace for subtype in decl.get('attr') or {}: if TYPE_RE.match(subtype): dt[subtype] = eval(name+'.'+subtype, dt) elif isinstance(decl, str) and TYPE_RE.match(decl): dt[name] = dt[decl] else: sys.exit("Unrecognized data type "+name+": "+repr(decl)) return dt def get_arguments(doc): """The kernel argument list, with argument names and types""" return [(var, eval(decl[var], datatypes)) for decl in doc.get('Arguments') or () if len(decl) == 1 for var in decl if TYPE_RE.match(decl[var])] def setkey_product(test, key, vals): """Helper for setdefaults. Tests that all values in vals is present in test, if so then sets test[key] to product of all test[vals].""" if all(x in test for x in vals): result = 1 for x in vals: if x in ('incx', 'incy'): result *= abs(test[x]) else: result *= test[x] test[key] = int(result) def setdefaults(test): """Set default values for parameters""" # Do not put constant defaults here -- use hipblas_common.yaml for that. # These are only for dynamic defaults # TODO: This should be ideally moved to YAML file, with eval'd expressions. # TODO: move to use hipblas names and decide if we want any auto defaults or just yaml if test['function'] in ('asum_strided_batched', 'nrm2_strided_batched', 'scal_strided_batched', 'swap_strided_batched', 'copy_strided_batched', 'dot_strided_batched', 'dotc_strided_batched', 'dot_strided_batched_ex', 'dotc_strided_batched_ex', 'rot_strided_batched', 'rot_strided_batched_ex', 'rotm_strided_batched', 'iamax_strided_batched', 'iamin_strided_batched', 'axpy_strided_batched', 'axpy_strided_batched_ex', 'nrm2_strided_batched_ex', 'scal_strided_batched_ex'): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) # we are using stride_c for param in rotm if all([x in test for x in ('stride_scale')]): test.setdefault('stride_c', int(test['stride_scale']) * 5) elif test['function'] in ('tpmv_strided_batched'): setkey_product(test, 'stride_x', ['M', 'incx', 'stride_scale']) # Let's use M * M (> (M * (M+1)) / 2) as a 'stride' size for the packed format. setkey_product(test, 'stride_a', ['M', 'M', 'stride_scale']) elif test['function'] in ('trmv_strided_batched'): setkey_product(test, 'stride_x', ['M', 'incx', 'stride_scale']) setkey_product(test, 'stride_a', ['M', 'lda', 'stride_scale']) elif test['function'] in ('gemv_strided_batched', 'gbmv_strided_batched', 'ger_strided_batched', 'geru_strided_batched', 'gerc_strided_batched', 'trsv_strided_batched'): if test['function'] in ('ger_strided_batched', 'geru_strided_batched', 'gerc_strided_batched', 'trsv_strided_batched' ) or test['transA'] in ('T', 'C'): setkey_product(test, 'stride_x', ['M', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) else: setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['M', 'incy', 'stride_scale']) if test['function'] in ('gbmv_strided_batched'): setkey_product(test, 'stride_a', ['lda', 'N', 'stride_scale']) elif test['function'] in ('hemv_strided_batched', 'hbmv_strided_batched'): if all([x in test for x in ('N', 'incx', 'incy', 'stride_scale')]): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) elif test['function'] in ('hpmv_strided_batched'): if all([x in test for x in ('N', 'incx', 'incy', 'stride_scale')]): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) ldN = int((test['N'] * (test['N'] + 1) * test['stride_scale']) / 2) test.setdefault('stride_a', ldN) elif test['function'] in ('spr_strided_batched', 'spr2_strided_batched', 'hpr_strided_batched', 'hpr2_strided_batched', 'tpsv_strided_batched'): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) setkey_product(test, 'stride_a', ['N', 'N', 'stride_scale']) elif test['function'] in ('her_strided_batched', 'her2_strided_batched', 'syr2_strided_batched'): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) # we are using stride_c for arg c and stride_d for arg s in rotg # these are are single values for each batch elif test['function'] in ('rotg_strided_batched'): if 'stride_scale' in test: test.setdefault('stride_a', int(test['stride_scale'])) test.setdefault('stride_b', int(test['stride_scale'])) test.setdefault('stride_c', int(test['stride_scale'])) test.setdefault('stride_d', int(test['stride_scale'])) # we are using stride_a for d1, stride_b for d2, and stride_c for param in # rotmg. These are are single values for each batch, except param which is # a 5 element array elif test['function'] in ('rotmg_strided_batched'): if 'stride_scale' in test: test.setdefault('stride_a', int(test['stride_scale'])) test.setdefault('stride_b', int(test['stride_scale'])) test.setdefault('stride_c', int(test['stride_scale']) * 5) test.setdefault('stride_x', int(test['stride_scale'])) test.setdefault('stride_y', int(test['stride_scale'])) elif test['function'] in ('dgmm_strided_batched'): setkey_product(test, 'stride_c', ['N', 'ldc', 'stride_scale']) setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) if test['side'].upper() == 'L': setkey_product(test, 'stride_x', ['M', 'incx', 'stride_scale']) else: setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) elif test['function'] in ('geam_strided_batched'): setkey_product(test, 'stride_c', ['N', 'ldc', 'stride_scale']) if test['transA'].upper() == 'N': setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) else: setkey_product(test, 'stride_a', ['M', 'lda', 'stride_scale']) if test['transB'].upper() == 'N': setkey_product(test, 'stride_b', ['N', 'ldb', 'stride_scale']) else: setkey_product(test, 'stride_b', ['M', 'ldb', 'stride_scale']) elif test['function'] in ('trmm_strided_batched'): setkey_product(test, 'stride_b', ['N', 'ldb', 'stride_scale']) if test['side'].upper() == 'L': setkey_product(test, 'stride_a', ['M', 'lda', 'stride_scale']) else: setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) elif test['function'] in ('trsm_strided_batched', 'trsm_strided_batched_ex'): setkey_product(test, 'stride_b', ['N', 'ldb', 'stride_scale']) if test['side'].upper() == 'L': setkey_product(test, 'stride_a', ['M', 'lda', 'stride_scale']) else: setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) elif test['function'] in ('tbmv_strided_batched'): if all([x in test for x in ('M', 'lda', 'stride_scale')]): ldM = int(test['M'] * test['lda'] * test['stride_scale']) test.setdefault('stride_a', ldM) if all([x in test for x in ('M', 'incx', 'stride_scale')]): ldx = int(test['M'] * abs(test['incx']) * test['stride_scale']) test.setdefault('stride_x', ldx) elif test['function'] in ('tbsv_strided_batched'): setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) test.setdefault('stride_x', 0) test.setdefault('stride_y', 0) if test['transA'] == '*' or test['transB'] == '*': test.setdefault('lda', 0) test.setdefault('ldb', 0) test.setdefault('ldc', 0) test.setdefault('ldd', 0) else: test.setdefault('lda', test['M'] if test['transA'].upper() == 'N' else test['K'] if test['K'] != 0 else 1) test.setdefault('ldb', test['K'] if test['K'] != 0 else 1 if test['transB'].upper() == 'N' else test['N']) test.setdefault('ldc', test['M']) test.setdefault('ldd', test['M']) if test['batch_count'] > 0: test.setdefault('stride_a', test['lda'] * (test['K'] if test['transA'].upper() == 'N' else test['M'])) test.setdefault('stride_b', test['ldb'] * (test['N'] if test['transB'].upper() == 'N' else test['K'])) test.setdefault('stride_c', test['ldc'] * test['N']) test.setdefault('stride_d', test['ldd'] * test['N']) return test.setdefault('stride_a', 0) test.setdefault('stride_b', 0) test.setdefault('stride_c', 0) test.setdefault('stride_d', 0) def write_signature(out): """Write the signature used to verify binary file compatibility""" if 'signature_written' not in args: sig = 0 byt = bytearray("hipBLAS", 'utf_8') byt.append(0) last_ofs = 0 for (name, ctype) in param['Arguments']._fields_: member = getattr(param['Arguments'], name) for i in range(0, member.offset - last_ofs): byt.append(0) for i in range(0, member.size): byt.append(sig ^ i) sig = (sig + 89) % 256 last_ofs = member.offset + member.size for i in range(0, ctypes.sizeof(param['Arguments']) - last_ofs): byt.append(0) byt.extend(bytes("HIPblas", 'utf_8')) byt.append(0) out.write(byt) args['signature_written'] = True def write_test(test): """Write the test case out to the binary file if not seen already""" # For each argument declared in arguments, we generate a positional # argument in the Arguments constructor. For strings, we pass the # value of the string directly. For arrays, we unpack their contents # into the ctype array constructor and pass the ctype array. For # scalars, we coerce the string/numeric value into ctype. arg = [] for name, ctype in param['Arguments']._fields_: try: if issubclass(ctype, ctypes.Array): if issubclass(ctype._type_, ctypes.c_char): arg.append(bytes(test[name], 'utf_8')) else: arg.append(ctype(*test[name])) elif issubclass(ctype, ctypes.c_char): arg.append(bytes(test[name], 'utf_8')) else: arg.append(ctype(test[name])) except TypeError as err: sys.exit("TypeError: " + str(err) + " for " + name + ", which has type " + str(type(test[name])) + "\n") byt = bytes(param['Arguments'](*arg)) if byt not in testcases: testcases.add(byt) write_signature(args['outfile']) args['outfile'].write(byt) def instantiate(test): """Instantiate a given test case""" test = test.copy() # Any Arguments fields declared as enums (a_type, b_type, etc.) enum_args = [decl[0] for decl in param['Arguments']._fields_ if decl[1].__module__ == '__main__'] try: setdefaults(test) # For enum arguments, replace name with value for typename in enum_args: if test[typename] in datatypes: test[typename] = datatypes[test[typename]] known_bug_platforms = set() # Match known bugs if test['category'] not in ('known_bug', 'disabled'): for bug in param['known_bugs']: for key, value in bug.items(): if key == 'known_bug_platforms' or key == 'category': continue if key not in test: break if key == 'function': if not fnmatchcase(test[key], value): break # For keys declared as enums, compare resulting values elif test[key] != (datatypes.get(value, value) if key in enum_args else value): break else: # All values specified in known bug match the test case platforms = bug.get('known_bug_platforms', '') # If at least one known_bug_platforms is specified, add # each platform in platforms to known_bug_platforms set if platforms.strip(' :,\f\n\r\t\v'): known_bug_platforms |= set(re.split('[ :,\f\n\r\t\v]+', platforms)) else: test['category'] = 'known_bug' break # Unless category is already set to known_bug or disabled, set # known_bug_platforms to a space-separated list of platforms test['known_bug_platforms'] = ' ' . join(known_bug_platforms) if test[ 'category'] not in ('known_bug', 'disabled') else '' write_test(test) except KeyError as err: sys.exit("Undefined value " + str(err) + "\n" + str(test)) def generate(test, function): """Generate test combinations by iterating across lists recursively""" test = test.copy() # For specially named lists, they are expanded and merged into the test # argument list. When the list name is a dictionary of length 1, its pairs # indicate that the argument named by its key takes on values paired with # the argument named by its value, which is another dictionary list. We # process the value dictionaries' keys in alphabetic order, to ensure # deterministic test ordering. for argname in param['dict_lists_to_expand']: if type(argname) == dict: if len(argname) == 1: arg, target = list(argname.items())[0] if arg in test and type(test[arg]) == dict: pairs = sorted(list(test[arg].items()), key=lambda x: x[0]) for test[arg], test[target] in pairs: generate(test, function) return elif argname in test and type(test[argname]) in (tuple, list, dict): # Pop the list and iterate across it ilist = test.pop(argname) # For a bare dictionary, wrap it in a list and apply it once for item in [ilist] if type(ilist) == dict else ilist: try: case = test.copy() case.update(item) # original test merged with each item generate(case, function) except TypeError as err: sys.exit("TypeError: " + str(err) + " for " + argname + ", which has type " + str(type(item)) + "\nA name listed in \"Dictionary lists to " "expand\" must be a defined as a dictionary.\n") return for key in sorted(list(test)): # Integer arguments which are ranges (A..B[..C]) are expanded if type(test[key]) == str: match = INT_RANGE_RE.match(str(test[key])) if match: for test[key] in range(int(match.group(1)), int(match.group(2))+1, int(match.group(3) or 1)): generate(test, function) return # For sequence arguments, they are expanded into scalars elif (type(test[key]) in (tuple, list) and key not in param['lists_to_not_expand']): for test[key] in test[key]: generate(test, function) return # Replace typed function names with generic functions and types if 'hipblas_function' in test: func = test.pop('hipblas_function') if func in param['Functions']: test.update(param['Functions'][func]) else: test['function'] = func.rpartition('hipblas_')[2] generate(test, function) return function(test) if __name__ == '__main__': main() hipBLAS-rocm-5.5.1/clients/common/hipblas_parse_data.cpp000066400000000000000000000075661434647641600232010ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "hipblas_parse_data.hpp" #include "hipblas_data.hpp" #include "utility.h" #include #include #include #include #include #include #include // Parse YAML data static std::string hipblas_parse_yaml(const std::string& yaml) { std::string tmp = hipblas_tempname(); auto exepath = hipblas_exepath(); auto cmd = exepath + "hipblas_gentest.py --template " + exepath + "hipblas_template.yaml -o " + tmp + " " + yaml; std::cerr << cmd << std::endl; #ifdef WIN32 int status = std::system(cmd.c_str()); if(status == -1) exit(EXIT_FAILURE); #else int status = system(cmd.c_str()); if(status == -1 || !WIFEXITED(status) || WEXITSTATUS(status)) exit(EXIT_FAILURE); #endif return tmp; } // Parse --data and --yaml command-line arguments bool hipblas_parse_data(int& argc, char** argv, const std::string& default_file) { std::string filename; char** argv_p = argv + 1; bool help = false, yaml = false; // Scan, process and remove any --yaml or --data options for(int i = 1; argv[i]; ++i) { if(!strcmp(argv[i], "--data") || !strcmp(argv[i], "--yaml")) { if(!strcmp(argv[i], "--yaml")) { yaml = true; } if(filename != "") { std::cerr << "Only one of the --yaml and --data options may be specified" << std::endl; exit(EXIT_FAILURE); } if(!argv[i + 1] || !argv[i + 1][0]) { std::cerr << "The " << argv[i] << " option requires an argument" << std::endl; exit(EXIT_FAILURE); } filename = argv[++i]; } else { *argv_p++ = argv[i]; if(!help && (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help"))) { help = true; std::cout << "\n" << argv[0] << " [ --data | --yaml ] ...\n" << std::endl; } } } // argc and argv contain remaining options and non-option arguments *argv_p = nullptr; argc = argv_p - argv; if(filename == "-") filename = "/dev/stdin"; else if(filename == "") filename = default_file; if(yaml) filename = hipblas_parse_yaml(filename); if(filename != "") { HipBLAS_TestData::set_filename(filename, yaml); return true; } return false; } hipBLAS-rocm-5.5.1/clients/common/hipblas_template_specialization.cpp000066400000000000000000043635471434647641600260170ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************/ #include "hipblas.h" #include "hipblas.hpp" #ifndef WIN32 #include "hipblas_fortran.hpp" #else #include "hipblas_no_fortran.hpp" #endif #include /*!\file * \brief provide template functions interfaces to ROCBLAS C89 interfaces */ /* * =========================================================================== * level 1 BLAS * =========================================================================== */ // axpy template <> hipblasStatus_t hipblasAxpy(hipblasHandle_t handle, int n, const hipblasHalf* alpha, const hipblasHalf* x, int incx, hipblasHalf* y, int incy) { return hipblasHaxpy(handle, n, alpha, x, incx, y, incy); } template <> hipblasStatus_t hipblasAxpy( hipblasHandle_t handle, int n, const float* alpha, const float* x, int incx, float* y, int incy) { return hipblasSaxpy(handle, n, alpha, x, incx, y, incy); } template <> hipblasStatus_t hipblasAxpy(hipblasHandle_t handle, int n, const double* alpha, const double* x, int incx, double* y, int incy) { return hipblasDaxpy(handle, n, alpha, x, incx, y, incy); } template <> hipblasStatus_t hipblasAxpy(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* y, int incy) { return hipblasCaxpy(handle, n, alpha, x, incx, y, incy); } template <> hipblasStatus_t hipblasAxpy(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { return hipblasZaxpy(handle, n, alpha, x, incx, y, incy); } // axpy_batched template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const hipblasHalf* alpha, const hipblasHalf* const x[], int incx, hipblasHalf* const y[], int incy, int batch_count) { return hipblasHaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const float* alpha, const float* const x[], int incx, float* const y[], int incy, int batch_count) { return hipblasSaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const double* alpha, const double* const x[], int incx, double* const y[], int incy, int batch_count) { return hipblasDaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count); } // axpy_strided_batched template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const hipblasHalf* alpha, const hipblasHalf* x, int incx, hipblasStride stridex, hipblasHalf* y, int incy, hipblasStride stridey, int batch_count) { return hipblasHaxpyStridedBatched( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, int batch_count) { return hipblasSaxpyStridedBatched( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, int batch_count) { return hipblasDaxpyStridedBatched( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCaxpyStridedBatched( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZaxpyStridedBatched( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } // scal template <> hipblasStatus_t hipblasScal(hipblasHandle_t handle, int n, const float* alpha, float* x, int incx) { return hipblasSscal(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal(hipblasHandle_t handle, int n, const double* alpha, double* x, int incx) { return hipblasDscal(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal( hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx) { return hipblasCscal(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal( hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx) { return hipblasCsscal(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx) { return hipblasZscal(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal( hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx) { return hipblasZdscal(handle, n, alpha, x, incx); } // scal_batched template <> hipblasStatus_t hipblasScalBatched( hipblasHandle_t handle, int n, const float* alpha, float* const x[], int incx, int batch_count) { return hipblasSscalBatched(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched(hipblasHandle_t handle, int n, const double* alpha, double* const x[], int incx, int batch_count) { return hipblasDscalBatched(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCscalBatched(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZscalBatched(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCsscalBatched(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZdscalBatched(handle, n, alpha, x, incx, batch_count); } // scal_strided_batched template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const float* alpha, float* x, int incx, hipblasStride stridex, int batch_count) { return hipblasSscalStridedBatched(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const double* alpha, double* x, int incx, hipblasStride stridex, int batch_count) { return hipblasDscalStridedBatched(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasCscalStridedBatched(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasZscalStridedBatched(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasCsscalStridedBatched(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasZdscalStridedBatched(handle, n, alpha, x, incx, stridex, batch_count); } //swap template <> hipblasStatus_t hipblasSwap(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy) { return hipblasSswap(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasSwap(hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy) { return hipblasDswap(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasSwap( hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy) { return hipblasCswap(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasSwap(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { return hipblasZswap(handle, n, x, incx, y, incy); } // swap_batched template <> hipblasStatus_t hipblasSwapBatched( hipblasHandle_t handle, int n, float* x[], int incx, float* y[], int incy, int batch_count) { return hipblasSswapBatched(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasSwapBatched( hipblasHandle_t handle, int n, double* x[], int incx, double* y[], int incy, int batch_count) { return hipblasDswapBatched(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasSwapBatched(hipblasHandle_t handle, int n, hipblasComplex* x[], int incx, hipblasComplex* y[], int incy, int batch_count) { return hipblasCswapBatched(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasSwapBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* x[], int incx, hipblasDoubleComplex* y[], int incy, int batch_count) { return hipblasZswapBatched(handle, n, x, incx, y, incy, batch_count); } // swap_strided_batched template <> hipblasStatus_t hipblasSwapStridedBatched(hipblasHandle_t handle, int n, float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, int batch_count) { return hipblasSswapStridedBatched(handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasSwapStridedBatched(hipblasHandle_t handle, int n, double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, int batch_count) { return hipblasDswapStridedBatched(handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasSwapStridedBatched(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCswapStridedBatched(handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasSwapStridedBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZswapStridedBatched(handle, n, x, incx, stridex, y, incy, stridey, batch_count); } // copy template <> hipblasStatus_t hipblasCopy(hipblasHandle_t handle, int n, const float* x, int incx, float* y, int incy) { return hipblasScopy(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasCopy( hipblasHandle_t handle, int n, const double* x, int incx, double* y, int incy) { return hipblasDcopy(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasCopy( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasComplex* y, int incy) { return hipblasCcopy(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasCopy(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { return hipblasZcopy(handle, n, x, incx, y, incy); } // copy_batched template <> hipblasStatus_t hipblasCopyBatched(hipblasHandle_t handle, int n, const float* const x[], int incx, float* const y[], int incy, int batch_count) { return hipblasScopyBatched(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasCopyBatched(hipblasHandle_t handle, int n, const double* const x[], int incx, double* const y[], int incy, int batch_count) { return hipblasDcopyBatched(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasCopyBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCcopyBatched(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasCopyBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZcopyBatched(handle, n, x, incx, y, incy, batch_count); } // copy_strided_batched template <> hipblasStatus_t hipblasCopyStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, int batch_count) { return hipblasScopyStridedBatched(handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasCopyStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, int batch_count) { return hipblasDcopyStridedBatched(handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasCopyStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCcopyStridedBatched(handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasCopyStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZcopyStridedBatched(handle, n, x, incx, stridex, y, incy, stridey, batch_count); } // dot template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const hipblasHalf* x, int incx, const hipblasHalf* y, int incy, hipblasHalf* result) { return hipblasHdot(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const hipblasBfloat16* x, int incx, const hipblasBfloat16* y, int incy, hipblasBfloat16* result) { return hipblasBfdot(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result) { return hipblasSdot(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result) { return hipblasDdot(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result) { return hipblasCdotu(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result) { return hipblasZdotu(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDotc(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result) { return hipblasCdotc(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDotc(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result) { return hipblasZdotc(handle, n, x, incx, y, incy, result); } // dot_batched template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const hipblasHalf* const x[], int incx, const hipblasHalf* const y[], int incy, int batch_count, hipblasHalf* result) { return hipblasHdotBatched(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const hipblasBfloat16* const x[], int incx, const hipblasBfloat16* const y[], int incy, int batch_count, hipblasBfloat16* result) { return hipblasBfdotBatched(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const float* const x[], int incx, const float* const y[], int incy, int batch_count, float* result) { return hipblasSdotBatched(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const double* const x[], int incx, const double* const y[], int incy, int batch_count, double* result) { return hipblasDdotBatched(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, int batch_count, hipblasComplex* result) { return hipblasCdotuBatched(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotcBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, int batch_count, hipblasComplex* result) { return hipblasCdotcBatched(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotuBatched(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotcBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotcBatched(handle, n, x, incx, y, incy, batch_count, result); } // dot_strided_batched template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const hipblasHalf* x, int incx, hipblasStride stridex, const hipblasHalf* y, int incy, hipblasStride stridey, int batch_count, hipblasHalf* result) { return hipblasHdotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const hipblasBfloat16* x, int incx, hipblasStride stridex, const hipblasBfloat16* y, int incy, hipblasStride stridey, int batch_count, hipblasBfloat16* result) { return hipblasBfdotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, int batch_count, float* result) { return hipblasSdotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, int batch_count, double* result) { return hipblasDdotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasComplex* result) { return hipblasCdotuStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotcStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasComplex* result) { return hipblasCdotcStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotuStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotcStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotcStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } // asum template <> hipblasStatus_t hipblasAsum( hipblasHandle_t handle, int n, const float* x, int incx, float* result) { return hipblasSasum(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasAsum( hipblasHandle_t handle, int n, const double* x, int incx, double* result) { return hipblasDasum(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasAsum( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result) { return hipblasScasum(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasAsum( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result) { return hipblasDzasum(handle, n, x, incx, result); } // asum_batched template <> hipblasStatus_t hipblasAsumBatched( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, float* result) { return hipblasSasumBatched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasAsumBatched(hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, double* result) { return hipblasDasumBatched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasAsumBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, float* result) { return hipblasScasumBatched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasAsumBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, double* result) { return hipblasDzasumBatched(handle, n, x, incx, batch_count, result); } // asum_strided_batched template <> hipblasStatus_t hipblasAsumStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasSasumStridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasAsumStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDasumStridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasAsumStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasScasumStridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasAsumStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDzasumStridedBatched(handle, n, x, incx, stridex, batch_count, result); } // nrm2 template <> hipblasStatus_t hipblasNrm2( hipblasHandle_t handle, int n, const float* x, int incx, float* result) { return hipblasSnrm2(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasNrm2( hipblasHandle_t handle, int n, const double* x, int incx, double* result) { return hipblasDnrm2(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasNrm2( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result) { return hipblasScnrm2(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasNrm2( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result) { return hipblasDznrm2(handle, n, x, incx, result); } // nrm2_batched template <> hipblasStatus_t hipblasNrm2Batched( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, float* result) { return hipblasSnrm2Batched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasNrm2Batched(hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, double* result) { return hipblasDnrm2Batched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasNrm2Batched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, float* result) { return hipblasScnrm2Batched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasNrm2Batched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, double* result) { return hipblasDznrm2Batched(handle, n, x, incx, batch_count, result); } // nrm2_strided_batched template <> hipblasStatus_t hipblasNrm2StridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasSnrm2StridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasNrm2StridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDnrm2StridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasNrm2StridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasScnrm2StridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasNrm2StridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDznrm2StridedBatched(handle, n, x, incx, stridex, batch_count, result); } // rot template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* c, const float* s) { return hipblasSrot(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* c, const double* s) { return hipblasDrot(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const hipblasComplex* s) { return hipblasCrot(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const float* s) { return hipblasCsrot(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const hipblasDoubleComplex* s) { return hipblasZrot(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const double* s) { return hipblasZdrot(handle, n, x, incx, y, incy, c, s); } // rot_batched template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, float* const x[], int incx, float* const y[], int incy, const float* c, const float* s, int batch_count) { return hipblasSrotBatched(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, double* const x[], int incx, double* const y[], int incy, const double* c, const double* s, int batch_count) { return hipblasDrotBatched(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, const float* c, const hipblasComplex* s, int batch_count) { return hipblasCrotBatched(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, const float* c, const float* s, int batch_count) { return hipblasCsrotBatched(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, const double* c, const hipblasDoubleComplex* s, int batch_count) { return hipblasZrotBatched(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, const double* c, const double* s, int batch_count) { return hipblasZdrotBatched(handle, n, x, incx, y, incy, c, s, batch_count); } // rot_strided_batched template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, const float* c, const float* s, int batch_count) { return hipblasSrotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, const double* c, const double* s, int batch_count) { return hipblasDrotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, const float* c, const hipblasComplex* s, int batch_count) { return hipblasCrotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, const float* c, const float* s, int batch_count) { return hipblasCsrotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, const double* c, const hipblasDoubleComplex* s, int batch_count) { return hipblasZrotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, const double* c, const double* s, int batch_count) { return hipblasZdrotStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } // rotg template <> hipblasStatus_t hipblasRotg(hipblasHandle_t handle, float* a, float* b, float* c, float* s) { return hipblasSrotg(handle, a, b, c, s); } template <> hipblasStatus_t hipblasRotg(hipblasHandle_t handle, double* a, double* b, double* c, double* s) { return hipblasDrotg(handle, a, b, c, s); } template <> hipblasStatus_t hipblasRotg( hipblasHandle_t handle, hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s) { return hipblasCrotg(handle, a, b, c, s); } template <> hipblasStatus_t hipblasRotg(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s) { return hipblasZrotg(handle, a, b, c, s); } // rotg_batched template <> hipblasStatus_t hipblasRotgBatched(hipblasHandle_t handle, float* const a[], float* const b[], float* const c[], float* const s[], int batch_count) { return hipblasSrotgBatched(handle, a, b, c, s, batch_count); } template <> hipblasStatus_t hipblasRotgBatched(hipblasHandle_t handle, double* const a[], double* const b[], double* const c[], double* const s[], int batch_count) { return hipblasDrotgBatched(handle, a, b, c, s, batch_count); } template <> hipblasStatus_t hipblasRotgBatched(hipblasHandle_t handle, hipblasComplex* const a[], hipblasComplex* const b[], float* const c[], hipblasComplex* const s[], int batch_count) { return hipblasCrotgBatched(handle, a, b, c, s, batch_count); } template <> hipblasStatus_t hipblasRotgBatched(hipblasHandle_t handle, hipblasDoubleComplex* const a[], hipblasDoubleComplex* const b[], double* const c[], hipblasDoubleComplex* const s[], int batch_count) { return hipblasZrotgBatched(handle, a, b, c, s, batch_count); } // rotg_strided_batched template <> hipblasStatus_t hipblasRotgStridedBatched(hipblasHandle_t handle, float* a, hipblasStride stridea, float* b, hipblasStride strideb, float* c, hipblasStride stridec, float* s, hipblasStride strides, int batch_count) { return hipblasSrotgStridedBatched( handle, a, stridea, b, strideb, c, stridec, s, strides, batch_count); } template <> hipblasStatus_t hipblasRotgStridedBatched(hipblasHandle_t handle, double* a, hipblasStride stridea, double* b, hipblasStride strideb, double* c, hipblasStride stridec, double* s, hipblasStride strides, int batch_count) { return hipblasDrotgStridedBatched( handle, a, stridea, b, strideb, c, stridec, s, strides, batch_count); } template <> hipblasStatus_t hipblasRotgStridedBatched(hipblasHandle_t handle, hipblasComplex* a, hipblasStride stridea, hipblasComplex* b, hipblasStride strideb, float* c, hipblasStride stridec, hipblasComplex* s, hipblasStride strides, int batch_count) { return hipblasCrotgStridedBatched( handle, a, stridea, b, strideb, c, stridec, s, strides, batch_count); } template <> hipblasStatus_t hipblasRotgStridedBatched(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasStride stridea, hipblasDoubleComplex* b, hipblasStride strideb, double* c, hipblasStride stridec, hipblasDoubleComplex* s, hipblasStride strides, int batch_count) { return hipblasZrotgStridedBatched( handle, a, stridea, b, strideb, c, stridec, s, strides, batch_count); } // rotm template <> hipblasStatus_t hipblasRotm( hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* param) { return hipblasSrotm(handle, n, x, incx, y, incy, param); } template <> hipblasStatus_t hipblasRotm( hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* param) { return hipblasDrotm(handle, n, x, incx, y, incy, param); } // rotm_batched template <> hipblasStatus_t hipblasRotmBatched(hipblasHandle_t handle, int n, float* const x[], int incx, float* const y[], int incy, const float* const param[], int batch_count) { return hipblasSrotmBatched(handle, n, x, incx, y, incy, param, batch_count); } template <> hipblasStatus_t hipblasRotmBatched(hipblasHandle_t handle, int n, double* const x[], int incx, double* const y[], int incy, const double* const param[], int batch_count) { return hipblasDrotmBatched(handle, n, x, incx, y, incy, param, batch_count); } // rotm_strided_batched template <> hipblasStatus_t hipblasRotmStridedBatched(hipblasHandle_t handle, int n, float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, const float* param, hipblasStride strideparam, int batch_count) { return hipblasSrotmStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, param, strideparam, batch_count); } template <> hipblasStatus_t hipblasRotmStridedBatched(hipblasHandle_t handle, int n, double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, const double* param, hipblasStride strideparam, int batch_count) { return hipblasDrotmStridedBatched( handle, n, x, incx, stridex, y, incy, stridey, param, strideparam, batch_count); } // rotmg template <> hipblasStatus_t hipblasRotmg( hipblasHandle_t handle, float* d1, float* d2, float* x1, const float* y1, float* param) { return hipblasSrotmg(handle, d1, d2, x1, y1, param); } template <> hipblasStatus_t hipblasRotmg( hipblasHandle_t handle, double* d1, double* d2, double* x1, const double* y1, double* param) { return hipblasDrotmg(handle, d1, d2, x1, y1, param); } // rotmg_batched template <> hipblasStatus_t hipblasRotmgBatched(hipblasHandle_t handle, float* const d1[], float* const d2[], float* const x1[], const float* const y1[], float* const param[], int batch_count) { return hipblasSrotmgBatched(handle, d1, d2, x1, y1, param, batch_count); } template <> hipblasStatus_t hipblasRotmgBatched(hipblasHandle_t handle, double* const d1[], double* const d2[], double* const x1[], const double* const y1[], double* const param[], int batch_count) { return hipblasDrotmgBatched(handle, d1, d2, x1, y1, param, batch_count); } // rotmg_strided_batched template <> hipblasStatus_t hipblasRotmgStridedBatched(hipblasHandle_t handle, float* d1, hipblasStride stride_d1, float* d2, hipblasStride stride_d2, float* x1, hipblasStride stride_x1, const float* y1, hipblasStride stride_y1, float* param, hipblasStride strideparam, int batch_count) { return hipblasSrotmgStridedBatched(handle, d1, stride_d1, d2, stride_d2, x1, stride_x1, y1, stride_y1, param, strideparam, batch_count); } template <> hipblasStatus_t hipblasRotmgStridedBatched(hipblasHandle_t handle, double* d1, hipblasStride stride_d1, double* d2, hipblasStride stride_d2, double* x1, hipblasStride stride_x1, const double* y1, hipblasStride stride_y1, double* param, hipblasStride strideparam, int batch_count) { return hipblasDrotmgStridedBatched(handle, d1, stride_d1, d2, stride_d2, x1, stride_x1, y1, stride_y1, param, strideparam, batch_count); } // amax template <> hipblasStatus_t hipblasIamax(hipblasHandle_t handle, int n, const float* x, int incx, int* result) { return hipblasIsamax(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamax(hipblasHandle_t handle, int n, const double* x, int incx, int* result) { return hipblasIdamax(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamax( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result) { return hipblasIcamax(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamax( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result) { return hipblasIzamax(handle, n, x, incx, result); } // amax_batched template <> hipblasStatus_t hipblasIamaxBatched( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, int* result) { return hipblasIsamaxBatched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIamaxBatched( hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, int* result) { return hipblasIdamaxBatched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIamaxBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, int* result) { return hipblasIcamaxBatched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIamaxBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, int* result) { return hipblasIzamaxBatched(handle, n, x, incx, batch_count, result); } // amax_strided_batched template <> hipblasStatus_t hipblasIamaxStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIsamaxStridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIamaxStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIdamaxStridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIamaxStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIcamaxStridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIamaxStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIzamaxStridedBatched(handle, n, x, incx, stridex, batch_count, result); } // amin template <> hipblasStatus_t hipblasIamin(hipblasHandle_t handle, int n, const float* x, int incx, int* result) { return hipblasIsamin(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamin(hipblasHandle_t handle, int n, const double* x, int incx, int* result) { return hipblasIdamin(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamin( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result) { return hipblasIcamin(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamin( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result) { return hipblasIzamin(handle, n, x, incx, result); } // amin_batched template <> hipblasStatus_t hipblasIaminBatched( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, int* result) { return hipblasIsaminBatched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIaminBatched( hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, int* result) { return hipblasIdaminBatched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIaminBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, int* result) { return hipblasIcaminBatched(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIaminBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, int* result) { return hipblasIzaminBatched(handle, n, x, incx, batch_count, result); } // amin_strided_batched template <> hipblasStatus_t hipblasIaminStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIsaminStridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIaminStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIdaminStridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIaminStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIcaminStridedBatched(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIaminStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIzaminStridedBatched(handle, n, x, incx, stridex, batch_count, result); } /* * =========================================================================== * level 2 BLAS * =========================================================================== */ // gbmv template <> hipblasStatus_t hipblasGbmv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSgbmv(handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGbmv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDgbmv(handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGbmv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasCgbmv(handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGbmv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZgbmv(handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } // gbmv_batched template <> hipblasStatus_t hipblasGbmvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* const y[], int incy, int batch_count) { return hipblasSgbmvBatched( handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGbmvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* const y[], int incy, int batch_count) { return hipblasDgbmvBatched( handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGbmvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCgbmvBatched( handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGbmvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZgbmvBatched( handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy, batch_count); } // gbmv_strided_batched template <> hipblasStatus_t hipblasGbmvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const float* alpha, const float* A, int lda, hipblasStride stride_a, const float* x, int incx, hipblasStride stride_x, const float* beta, float* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasSgbmvStridedBatched(handle, transA, m, n, kl, ku, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } template <> hipblasStatus_t hipblasGbmvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const double* alpha, const double* A, int lda, hipblasStride stride_a, const double* x, int incx, hipblasStride stride_x, const double* beta, double* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasDgbmvStridedBatched(handle, transA, m, n, kl, ku, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } template <> hipblasStatus_t hipblasGbmvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride stride_a, const hipblasComplex* x, int incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasCgbmvStridedBatched(handle, transA, m, n, kl, ku, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } template <> hipblasStatus_t hipblasGbmvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasZgbmvStridedBatched(handle, transA, m, n, kl, ku, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } // gemv template <> hipblasStatus_t hipblasGemv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSgemv(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGemv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDgemv(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGemv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasCgemv(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGemv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZgemv(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } // gemv_batched template <> hipblasStatus_t hipblasGemvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* const y[], int incy, int batch_count) { return hipblasSgemvBatched( handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGemvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* const y[], int incy, int batch_count) { return hipblasDgemvBatched( handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGemvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCgemvBatched( handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGemvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZgemvBatched( handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } // gemv_strided_batched template <> hipblasStatus_t hipblasGemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batch_count) { return hipblasSgemvStridedBatched(handle, transA, m, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasGemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batch_count) { return hipblasDgemvStridedBatched(handle, transA, m, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasGemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCgemvStridedBatched(handle, transA, m, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasGemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZgemvStridedBatched(handle, transA, m, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batch_count); } // ger template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda) { return hipblasSger(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda) { return hipblasDger(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCgeru(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCgerc(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZgeru(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZgerc(handle, m, n, alpha, x, incx, y, incy, A, lda); } // ger_batched template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const float* alpha, const float* const x[], int incx, const float* const y[], int incy, float* const A[], int lda, int batch_count) { return hipblasSgerBatched(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const double* alpha, const double* const x[], int incx, const double* const y[], int incy, double* const A[], int lda, int batch_count) { return hipblasDgerBatched(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batch_count) { return hipblasCgeruBatched(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batch_count) { return hipblasCgercBatched(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batch_count) { return hipblasZgeruBatched(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batch_count) { return hipblasZgercBatched(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } // ger_strided_batched template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, float* A, int lda, hipblasStride strideA, int batch_count) { return hipblasSgerStridedBatched( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, double* A, int lda, hipblasStride strideA, int batch_count) { return hipblasDgerStridedBatched( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasCgeruStridedBatched( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasCgercStridedBatched( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasZgeruStridedBatched( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasZgercStridedBatched( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } // hbmv template <> hipblasStatus_t hipblasHbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasChbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasHbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZhbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } // hbmv_batched template <> hipblasStatus_t hipblasHbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batchCount) { return hipblasChbmvBatched( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasHbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batchCount) { return hipblasZhbmvBatched( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy, batchCount); } // hbmv_strided_batched template <> hipblasStatus_t hipblasHbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasChbmvStridedBatched(handle, uplo, n, k, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasHbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasZhbmvStridedBatched(handle, uplo, n, k, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } // hemv template <> hipblasStatus_t hipblasHemv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasChemv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasHemv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZhemv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } // hemv_batched template <> hipblasStatus_t hipblasHemvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count) { return hipblasChemvBatched(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasHemvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZhemvBatched(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } // hemv_strided_batched template <> hipblasStatus_t hipblasHemvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride stride_a, const hipblasComplex* x, int incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasChemvStridedBatched(handle, uplo, n, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } template <> hipblasStatus_t hipblasHemvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasZhemvStridedBatched(handle, uplo, n, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } // her template <> hipblasStatus_t hipblasHer(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* A, int lda) { return hipblasCher(handle, uplo, n, alpha, x, incx, A, lda); } template <> hipblasStatus_t hipblasHer(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda) { return hipblasZher(handle, uplo, n, alpha, x, incx, A, lda); } // her_batched template <> hipblasStatus_t hipblasHerBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const A[], int lda, int batchCount) { return hipblasCherBatched(handle, uplo, n, alpha, x, incx, A, lda, batchCount); } template <> hipblasStatus_t hipblasHerBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const A[], int lda, int batchCount) { return hipblasZherBatched(handle, uplo, n, alpha, x, incx, A, lda, batchCount); } // her_strided_batched template <> hipblasStatus_t hipblasHerStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasCherStridedBatched( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasHerStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasZherStridedBatched( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batchCount); } // her2 template <> hipblasStatus_t hipblasHer2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCher2(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasHer2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZher2(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } // her2_batched template <> hipblasStatus_t hipblasHer2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batchCount) { return hipblasCher2Batched(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } template <> hipblasStatus_t hipblasHer2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batchCount) { return hipblasZher2Batched(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } // her2_strided_batched template <> hipblasStatus_t hipblasHer2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasCher2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasHer2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasZher2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } // hpmv template <> hipblasStatus_t hipblasHpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasChpmv(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasHpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZhpmv(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); } // hpmv_batched template <> hipblasStatus_t hipblasHpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const AP[], const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batchCount) { return hipblasChpmvBatched(handle, uplo, n, alpha, AP, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasHpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batchCount) { return hipblasZhpmvBatched(handle, uplo, n, alpha, AP, x, incx, beta, y, incy, batchCount); } // hpmv_strided_batched template <> hipblasStatus_t hipblasHpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, hipblasStride strideAP, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasChpmvStridedBatched( handle, uplo, n, alpha, AP, strideAP, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasHpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, hipblasStride strideAP, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasZhpmvStridedBatched( handle, uplo, n, alpha, AP, strideAP, x, incx, stridex, beta, y, incy, stridey, batchCount); } // hpr template <> hipblasStatus_t hipblasHpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP) { return hipblasChpr(handle, uplo, n, alpha, x, incx, AP); } template <> hipblasStatus_t hipblasHpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP) { return hipblasZhpr(handle, uplo, n, alpha, x, incx, AP); } // hpr_batched template <> hipblasStatus_t hipblasHprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const AP[], int batchCount) { return hipblasChprBatched(handle, uplo, n, alpha, x, incx, AP, batchCount); } template <> hipblasStatus_t hipblasHprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const AP[], int batchCount) { return hipblasZhprBatched(handle, uplo, n, alpha, x, incx, AP, batchCount); } // hpr_strided_batched template <> hipblasStatus_t hipblasHprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasChprStridedBatched( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasHprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasZhprStridedBatched( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } // hpr2 template <> hipblasStatus_t hipblasHpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* AP) { return hipblasChpr2(handle, uplo, n, alpha, x, incx, y, incy, AP); } template <> hipblasStatus_t hipblasHpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP) { return hipblasZhpr2(handle, uplo, n, alpha, x, incx, y, incy, AP); } // hpr2_batched template <> hipblasStatus_t hipblasHpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const AP[], int batchCount) { return hipblasChpr2Batched(handle, uplo, n, alpha, x, incx, y, incy, AP, batchCount); } template <> hipblasStatus_t hipblasHpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const AP[], int batchCount) { return hipblasZhpr2Batched(handle, uplo, n, alpha, x, incx, y, incy, AP, batchCount); } // hpr2_strided_batched template <> hipblasStatus_t hipblasHpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasChpr2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasHpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasZhpr2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, AP, strideAP, batchCount); } // sbmv template <> hipblasStatus_t hipblasSbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSsbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDsbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } // sbmv_batched template <> hipblasStatus_t hipblasSbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* y[], int incy, int batchCount) { return hipblasSsbmvBatched( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* y[], int incy, int batchCount) { return hipblasDsbmvBatched( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy, batchCount); } // sbmv_strided_batched template <> hipblasStatus_t hipblasSbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount) { return hipblasSsbmvStridedBatched(handle, uplo, n, k, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount) { return hipblasDsbmvStridedBatched(handle, uplo, n, k, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } // spmv template <> hipblasStatus_t hipblasSpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* AP, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSspmv(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* AP, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDspmv(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); } // spmv_batched template <> hipblasStatus_t hipblasSpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const AP[], const float* const x[], int incx, const float* beta, float* y[], int incy, int batchCount) { return hipblasSspmvBatched(handle, uplo, n, alpha, AP, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const AP[], const double* const x[], int incx, const double* beta, double* y[], int incy, int batchCount) { return hipblasDspmvBatched(handle, uplo, n, alpha, AP, x, incx, beta, y, incy, batchCount); } // spmv_strided_batched template <> hipblasStatus_t hipblasSpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* AP, hipblasStride strideAP, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount) { return hipblasSspmvStridedBatched( handle, uplo, n, alpha, AP, strideAP, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* AP, hipblasStride strideAP, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount) { return hipblasDspmvStridedBatched( handle, uplo, n, alpha, AP, strideAP, x, incx, stridex, beta, y, incy, stridey, batchCount); } // spr template <> hipblasStatus_t hipblasSpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP) { return hipblasSspr(handle, uplo, n, alpha, x, incx, AP); } template <> hipblasStatus_t hipblasSpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* AP) { return hipblasDspr(handle, uplo, n, alpha, x, incx, AP); } template <> hipblasStatus_t hipblasSpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP) { return hipblasCspr(handle, uplo, n, alpha, x, incx, AP); } template <> hipblasStatus_t hipblasSpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP) { return hipblasZspr(handle, uplo, n, alpha, x, incx, AP); } // spr_batched template <> hipblasStatus_t hipblasSprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, float* const AP[], int batchCount) { return hipblasSsprBatched(handle, uplo, n, alpha, x, incx, AP, batchCount); } template <> hipblasStatus_t hipblasSprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, double* const AP[], int batchCount) { return hipblasDsprBatched(handle, uplo, n, alpha, x, incx, AP, batchCount); } template <> hipblasStatus_t hipblasSprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const AP[], int batchCount) { return hipblasCsprBatched(handle, uplo, n, alpha, x, incx, AP, batchCount); } template <> hipblasStatus_t hipblasSprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const AP[], int batchCount) { return hipblasZsprBatched(handle, uplo, n, alpha, x, incx, AP, batchCount); } // spr_strided_batched template <> hipblasStatus_t hipblasSprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, float* AP, hipblasStride strideAP, int batchCount) { return hipblasSsprStridedBatched( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasSprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, double* AP, hipblasStride strideAP, int batchCount) { return hipblasDsprStridedBatched( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasSprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasCsprStridedBatched( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasSprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasZsprStridedBatched( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } // spr2 template <> hipblasStatus_t hipblasSpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* AP) { return hipblasSspr2(handle, uplo, n, alpha, x, incx, y, incy, AP); } template <> hipblasStatus_t hipblasSpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* AP) { return hipblasDspr2(handle, uplo, n, alpha, x, incx, y, incy, AP); } // spr2_batched template <> hipblasStatus_t hipblasSpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, const float* const y[], int incy, float* const AP[], int batchCount) { return hipblasSspr2Batched(handle, uplo, n, alpha, x, incx, y, incy, AP, batchCount); } template <> hipblasStatus_t hipblasSpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, const double* const y[], int incy, double* const AP[], int batchCount) { return hipblasDspr2Batched(handle, uplo, n, alpha, x, incx, y, incy, AP, batchCount); } // spr2_strided_batched template <> hipblasStatus_t hipblasSpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, float* AP, hipblasStride strideAP, int batchCount) { return hipblasSspr2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasSpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, double* AP, hipblasStride strideAP, int batchCount) { return hipblasDspr2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, AP, strideAP, batchCount); } // symv template <> hipblasStatus_t hipblasSymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSsymv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDsymv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasCsymv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZsymv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } // symv_batched template <> hipblasStatus_t hipblasSymvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* y[], int incy, int batchCount) { return hipblasSsymvBatched(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSymvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* y[], int incy, int batchCount) { return hipblasDsymvBatched(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSymvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* y[], int incy, int batchCount) { return hipblasCsymvBatched(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSymvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y[], int incy, int batchCount) { return hipblasZsymvBatched(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batchCount); } // symv_strided_batched template <> hipblasStatus_t hipblasSymvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount) { return hipblasSsymvStridedBatched(handle, uplo, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSymvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount) { return hipblasDsymvStridedBatched(handle, uplo, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSymvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasCsymvStridedBatched(handle, uplo, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSymvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasZsymvStridedBatched(handle, uplo, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } // syr template <> hipblasStatus_t hipblasSyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* A, int lda) { return hipblasSsyr(handle, uplo, n, alpha, x, incx, A, lda); } template <> hipblasStatus_t hipblasSyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* A, int lda) { return hipblasDsyr(handle, uplo, n, alpha, x, incx, A, lda); } template <> hipblasStatus_t hipblasSyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* A, int lda) { return hipblasCsyr(handle, uplo, n, alpha, x, incx, A, lda); } template <> hipblasStatus_t hipblasSyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda) { return hipblasZsyr(handle, uplo, n, alpha, x, incx, A, lda); } // syr_batched template <> hipblasStatus_t hipblasSyrBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, float* const A[], int lda, int batch_count) { return hipblasSsyrBatched(handle, uplo, n, alpha, x, incx, A, lda, batch_count); } template <> hipblasStatus_t hipblasSyrBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, double* const A[], int lda, int batch_count) { return hipblasDsyrBatched(handle, uplo, n, alpha, x, incx, A, lda, batch_count); } template <> hipblasStatus_t hipblasSyrBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const A[], int lda, int batch_count) { return hipblasCsyrBatched(handle, uplo, n, alpha, x, incx, A, lda, batch_count); } template <> hipblasStatus_t hipblasSyrBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const A[], int lda, int batch_count) { return hipblasZsyrBatched(handle, uplo, n, alpha, x, incx, A, lda, batch_count); } // syr_strided_batched template <> hipblasStatus_t hipblasSyrStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, float* A, int lda, hipblasStride strideA, int batch_count) { return hipblasSsyrStridedBatched( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasSyrStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, double* A, int lda, hipblasStride strideA, int batch_count) { return hipblasDsyrStridedBatched( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasSyrStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasCsyrStridedBatched( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasSyrStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasZsyrStridedBatched( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batch_count); } // syr2 template <> hipblasStatus_t hipblasSyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda) { return hipblasSsyr2(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasSyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda) { return hipblasDsyr2(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasSyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCsyr2(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasSyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZsyr2(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } // syr2_batched template <> hipblasStatus_t hipblasSyr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, const float* const y[], int incy, float* const A[], int lda, int batchCount) { return hipblasSsyr2Batched(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } template <> hipblasStatus_t hipblasSyr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, const double* const y[], int incy, double* const A[], int lda, int batchCount) { return hipblasDsyr2Batched(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } template <> hipblasStatus_t hipblasSyr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batchCount) { return hipblasCsyr2Batched(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } template <> hipblasStatus_t hipblasSyr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batchCount) { return hipblasZsyr2Batched(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } // syr2_strided_batched template <> hipblasStatus_t hipblasSyr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, float* A, int lda, hipblasStride strideA, int batchCount) { return hipblasSsyr2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasSyr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, double* A, int lda, hipblasStride strideA, int batchCount) { return hipblasDsyr2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasSyr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasCsyr2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasSyr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasZsyr2StridedBatched( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } // trsv template <> hipblasStatus_t hipblasTrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, float* x, int incx) { return hipblasStrsv(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, double* x, int incx) { return hipblasDtrsv(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtrsv(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtrsv(handle, uplo, transA, diag, m, A, lda, x, incx); } // trsv_batched template <> hipblasStatus_t hipblasTrsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const A[], int lda, float* const x[], int incx, int batch_count) { return hipblasStrsvBatched(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const A[], int lda, double* const x[], int incx, int batch_count) { return hipblasDtrsvBatched(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCtrsvBatched(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZtrsvBatched(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } // trsv_strided_batched template <> hipblasStatus_t hipblasTrsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, hipblasStride strideA, float* x, int incx, hipblasStride stridex, int batch_count) { return hipblasStrsvStridedBatched( handle, uplo, transA, diag, m, A, lda, strideA, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasTrsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, hipblasStride strideA, double* x, int incx, hipblasStride stridex, int batch_count) { return hipblasDtrsvStridedBatched( handle, uplo, transA, diag, m, A, lda, strideA, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasTrsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasCtrsvStridedBatched( handle, uplo, transA, diag, m, A, lda, strideA, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasTrsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasZtrsvStridedBatched( handle, uplo, transA, diag, m, A, lda, strideA, x, incx, stridex, batch_count); } /* * =========================================================================== * level 3 BLAS * =========================================================================== */ // trtri template <> hipblasStatus_t hipblasTrtri(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, float* A, int lda, float* invA, int ldinvA) { return hipblasStrtri(handle, uplo, diag, n, A, lda, invA, ldinvA); } template <> hipblasStatus_t hipblasTrtri(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, double* A, int lda, double* invA, int ldinvA) { return hipblasDtrtri(handle, uplo, diag, n, A, lda, invA, ldinvA); } template <> hipblasStatus_t hipblasTrtri(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasComplex* A, int lda, hipblasComplex* invA, int ldinvA) { return hipblasCtrtri(handle, uplo, diag, n, A, lda, invA, ldinvA); } template <> hipblasStatus_t hipblasTrtri(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* invA, int ldinvA) { return hipblasZtrtri(handle, uplo, diag, n, A, lda, invA, ldinvA); } // trtri_batched template <> hipblasStatus_t hipblasTrtriBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, float* A[], int lda, float* invA[], int ldinvA, int batch_count) { return hipblasStrtriBatched(handle, uplo, diag, n, A, lda, invA, ldinvA, batch_count); } template <> hipblasStatus_t hipblasTrtriBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, double* A[], int lda, double* invA[], int ldinvA, int batch_count) { return hipblasDtrtriBatched(handle, uplo, diag, n, A, lda, invA, ldinvA, batch_count); } template <> hipblasStatus_t hipblasTrtriBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasComplex* A[], int lda, hipblasComplex* invA[], int ldinvA, int batch_count) { return hipblasCtrtriBatched(handle, uplo, diag, n, A, lda, invA, ldinvA, batch_count); } template <> hipblasStatus_t hipblasTrtriBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasDoubleComplex* A[], int lda, hipblasDoubleComplex* invA[], int ldinvA, int batch_count) { return hipblasZtrtriBatched(handle, uplo, diag, n, A, lda, invA, ldinvA, batch_count); } // trtri_strided_batched template <> hipblasStatus_t hipblasTrtriStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, float* A, int lda, hipblasStride stride_A, float* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasStrtriStridedBatched( handle, uplo, diag, n, A, lda, stride_A, invA, ldinvA, stride_invA, batch_count); } template <> hipblasStatus_t hipblasTrtriStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, double* A, int lda, hipblasStride stride_A, double* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasDtrtriStridedBatched( handle, uplo, diag, n, A, lda, stride_A, invA, ldinvA, stride_invA, batch_count); } template <> hipblasStatus_t hipblasTrtriStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasComplex* A, int lda, hipblasStride stride_A, hipblasComplex* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasCtrtriStridedBatched( handle, uplo, diag, n, A, lda, stride_A, invA, ldinvA, stride_invA, batch_count); } template <> hipblasStatus_t hipblasTrtriStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasDoubleComplex* A, int lda, hipblasStride stride_A, hipblasDoubleComplex* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasZtrtriStridedBatched( handle, uplo, diag, n, A, lda, stride_A, invA, ldinvA, stride_invA, batch_count); } // dgmm template <> hipblasStatus_t hipblasDgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc) { return hipblasSdgmm(handle, side, m, n, A, lda, x, incx, C, ldc); } template <> hipblasStatus_t hipblasDgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* A, int lda, const double* x, int incx, double* C, int ldc) { return hipblasDdgmm(handle, side, m, n, A, lda, x, incx, C, ldc); } template <> hipblasStatus_t hipblasDgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, hipblasComplex* C, int ldc) { return hipblasCdgmm(handle, side, m, n, A, lda, x, incx, C, ldc); } template <> hipblasStatus_t hipblasDgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* C, int ldc) { return hipblasZdgmm(handle, side, m, n, A, lda, x, incx, C, ldc); } // dgmm_batched template <> hipblasStatus_t hipblasDgmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* const A[], int lda, const float* const x[], int incx, float* const C[], int ldc, int batch_count) { return hipblasSdgmmBatched(handle, side, m, n, A, lda, x, incx, C, ldc, batch_count); } template <> hipblasStatus_t hipblasDgmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* const A[], int lda, const double* const x[], int incx, double* const C[], int ldc, int batch_count) { return hipblasDdgmmBatched(handle, side, m, n, A, lda, x, incx, C, ldc, batch_count); } template <> hipblasStatus_t hipblasDgmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, hipblasComplex* const C[], int ldc, int batch_count) { return hipblasCdgmmBatched(handle, side, m, n, A, lda, x, incx, C, ldc, batch_count); } template <> hipblasStatus_t hipblasDgmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const C[], int ldc, int batch_count) { return hipblasZdgmmBatched(handle, side, m, n, A, lda, x, incx, C, ldc, batch_count); } // dgmm_strided_batched template <> hipblasStatus_t hipblasDgmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* A, int lda, hipblasStride stride_A, const float* x, int incx, hipblasStride stride_x, float* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasSdgmmStridedBatched( handle, side, m, n, A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count); } template <> hipblasStatus_t hipblasDgmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* A, int lda, hipblasStride stride_A, const double* x, int incx, hipblasStride stride_x, double* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasDdgmmStridedBatched( handle, side, m, n, A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count); } template <> hipblasStatus_t hipblasDgmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* A, int lda, hipblasStride stride_A, const hipblasComplex* x, int incx, hipblasStride stride_x, hipblasComplex* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasCdgmmStridedBatched( handle, side, m, n, A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count); } template <> hipblasStatus_t hipblasDgmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* A, int lda, hipblasStride stride_A, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, hipblasDoubleComplex* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasZdgmmStridedBatched( handle, side, m, n, A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count); } // gemm template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasHalf* alpha, const hipblasHalf* A, int lda, const hipblasHalf* B, int ldb, const hipblasHalf* beta, hipblasHalf* C, int ldc) { return hipblasHgemm(handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) { return hipblasSgemm(handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc) { return hipblasDgemm(handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCgemm(handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZgemm(handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // gemm_batched template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasHalf* alpha, const hipblasHalf* const A[], int lda, const hipblasHalf* const B[], int ldb, const hipblasHalf* beta, hipblasHalf* const C[], int ldc, int batch_count) { return hipblasHgemmBatched( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batch_count) { return hipblasSgemmBatched( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batch_count) { return hipblasDgemmBatched( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batch_count) { return hipblasCgemmBatched( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batch_count) { return hipblasZgemmBatched( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } // gemm_strided_batched template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasHalf* alpha, const hipblasHalf* A, int lda, int bsa, const hipblasHalf* B, int ldb, int bsb, const hipblasHalf* beta, hipblasHalf* C, int ldc, int bsc, int batch_count) { return hipblasHgemmStridedBatched(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* A, int lda, int bsa, const float* B, int ldb, int bsb, const float* beta, float* C, int ldc, int bsc, int batch_count) { return hipblasSgemmStridedBatched(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* A, int lda, int bsa, const double* B, int ldb, int bsb, const double* beta, double* C, int ldc, int bsc, int batch_count) { return hipblasDgemmStridedBatched(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, int bsa, const hipblasComplex* B, int ldb, int bsb, const hipblasComplex* beta, hipblasComplex* C, int ldc, int bsc, int batch_count) { return hipblasCgemmStridedBatched(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, int bsa, const hipblasDoubleComplex* B, int ldb, int bsb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, int bsc, int batch_count) { return hipblasZgemmStridedBatched(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } // herk template <> hipblasStatus_t hipblasHerk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* A, int lda, const float* beta, hipblasComplex* C, int ldc) { return hipblasCherk(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } template <> hipblasStatus_t hipblasHerk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* A, int lda, const double* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZherk(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } // herk_batched template <> hipblasStatus_t hipblasHerkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* const A[], int lda, const float* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCherkBatched(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasHerkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* const A[], int lda, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZherkBatched(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } // herk_strided_batched template <> hipblasStatus_t hipblasHerkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCherkStridedBatched( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasHerkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZherkStridedBatched( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } // her2k template <> hipblasStatus_t hipblasHer2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const float* beta, hipblasComplex* C, int ldc) { return hipblasCher2k(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasHer2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const double* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZher2k(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // her2k_batched template <> hipblasStatus_t hipblasHer2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const float* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCher2kBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasHer2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZher2kBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // her2k_strided_batched template <> hipblasStatus_t hipblasHer2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCher2kStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasHer2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZher2kStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // herkx template <> hipblasStatus_t hipblasHerkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const float* beta, hipblasComplex* C, int ldc) { return hipblasCherkx(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasHerkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const double* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZherkx(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // herkx_batched template <> hipblasStatus_t hipblasHerkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const float* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCherkxBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasHerkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZherkxBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // herkx_strided_batched template <> hipblasStatus_t hipblasHerkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCherkxStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasHerkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZherkxStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // symm template <> hipblasStatus_t hipblasSymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) { return hipblasSsymm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc) { return hipblasDsymm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsymm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsymm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } // symm_batched template <> hipblasStatus_t hipblasSymmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount) { return hipblasSsymmBatched( handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSymmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount) { return hipblasDsymmBatched( handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSymmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsymmBatched( handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSymmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsymmBatched( handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // symm_strided_batched template <> hipblasStatus_t hipblasSymmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* B, int ldb, hipblasStride strideB, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSsymmStridedBatched(handle, side, uplo, m, n, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSymmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* B, int ldb, hipblasStride strideB, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDsymmStridedBatched(handle, side, uplo, m, n, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSymmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsymmStridedBatched(handle, side, uplo, m, n, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSymmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsymmStridedBatched(handle, side, uplo, m, n, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // syrk template <> hipblasStatus_t hipblasSyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc) { return hipblasSsyrk(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, const double* beta, double* C, int ldc) { return hipblasDsyrk(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsyrk(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsyrk(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } // syrk_batched template <> hipblasStatus_t hipblasSyrkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* const A[], int lda, const float* beta, float* const C[], int ldc, int batchCount) { return hipblasSsyrkBatched(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* const A[], int lda, const double* beta, double* const C[], int ldc, int batchCount) { return hipblasDsyrkBatched(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsyrkBatched(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsyrkBatched(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } // syrk_strided_batched template <> hipblasStatus_t hipblasSyrkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSsyrkStridedBatched( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDsyrkStridedBatched( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsyrkStridedBatched( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsyrkStridedBatched( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } // syr2k template <> hipblasStatus_t hipblasSyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) { return hipblasSsyr2k(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc) { return hipblasDsyr2k(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsyr2k(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsyr2k(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // syr2k_batched template <> hipblasStatus_t hipblasSyr2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount) { return hipblasSsyr2kBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyr2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount) { return hipblasDsyr2kBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyr2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsyr2kBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyr2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsyr2kBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // syr2k_strided_batched template <> hipblasStatus_t hipblasSyr2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* B, int ldb, hipblasStride strideB, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSsyr2kStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyr2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* B, int ldb, hipblasStride strideB, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDsyr2kStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyr2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsyr2kStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyr2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsyr2kStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // syrkx template <> hipblasStatus_t hipblasSyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) { return hipblasSsyrkx(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc) { return hipblasDsyrkx(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsyrkx(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsyrkx(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // syrkx_batched template <> hipblasStatus_t hipblasSyrkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount) { return hipblasSsyrkxBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount) { return hipblasDsyrkxBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsyrkxBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsyrkxBatched( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // syrkx_strided_batched template <> hipblasStatus_t hipblasSyrkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* B, int ldb, hipblasStride strideB, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSsyrkxStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* B, int ldb, hipblasStride strideB, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDsyrkxStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsyrkxStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsyrkxStridedBatched(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // hemm template <> hipblasStatus_t hipblasHemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasChemm(handle, side, uplo, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasHemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZhemm(handle, side, uplo, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // hemm_batched template <> hipblasStatus_t hipblasHemmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasChemmBatched( handle, side, uplo, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasHemmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZhemmBatched( handle, side, uplo, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // hemm_strided_batched template <> hipblasStatus_t hipblasHemmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasChemmStridedBatched(handle, side, uplo, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasHemmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZhemmStridedBatched(handle, side, uplo, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // trmm template <> hipblasStatus_t hipblasTrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, float* B, int ldb) { return hipblasStrmm(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, double* B, int ldb) { return hipblasDtrmm(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasComplex* B, int ldb) { return hipblasCtrmm(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb) { return hipblasZtrmm(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } // trmm_batched template <> hipblasStatus_t hipblasTrmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount) { return hipblasStrmmBatched( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batchCount); } template <> hipblasStatus_t hipblasTrmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount) { return hipblasDtrmmBatched( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batchCount); } template <> hipblasStatus_t hipblasTrmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, hipblasComplex* const B[], int ldb, int batchCount) { return hipblasCtrmmBatched( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batchCount); } template <> hipblasStatus_t hipblasTrmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const B[], int ldb, int batchCount) { return hipblasZtrmmBatched( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batchCount); } // trmm_strided_batched template <> hipblasStatus_t hipblasTrmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, float* B, int ldb, hipblasStride strideB, int batchCount) { return hipblasStrmmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batchCount); } template <> hipblasStatus_t hipblasTrmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, double* B, int ldb, hipblasStride strideB, int batchCount) { return hipblasDtrmmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batchCount); } template <> hipblasStatus_t hipblasTrmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* B, int ldb, hipblasStride strideB, int batchCount) { return hipblasCtrmmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batchCount); } template <> hipblasStatus_t hipblasTrmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* B, int ldb, hipblasStride strideB, int batchCount) { return hipblasZtrmmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batchCount); } // tbmv template <> hipblasStatus_t hipblasTbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, float* x, int incx) { return hipblasStbmv(handle, uplo, transA, diag, m, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, double* x, int incx) { return hipblasDtbmv(handle, uplo, transA, diag, m, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtbmv(handle, uplo, transA, diag, m, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtbmv(handle, uplo, transA, diag, m, k, A, lda, x, incx); } // tbmv_batched template <> hipblasStatus_t hipblasTbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* const A[], int lda, float* const x[], int incx, int batch_count) { return hipblasStbmvBatched(handle, uplo, transA, diag, m, k, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* const A[], int lda, double* const x[], int incx, int batch_count) { return hipblasDtbmvBatched(handle, uplo, transA, diag, m, k, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCtbmvBatched(handle, uplo, transA, diag, m, k, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZtbmvBatched(handle, uplo, transA, diag, m, k, A, lda, x, incx, batch_count); } // tbmv_strided_batched template <> hipblasStatus_t hipblasTbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, hipblasStride stride_a, float* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasStbmvStridedBatched( handle, uplo, transA, diag, m, k, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, hipblasStride stride_a, double* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasDtbmvStridedBatched( handle, uplo, transA, diag, m, k, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasStride stride_a, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasCtbmvStridedBatched( handle, uplo, transA, diag, m, k, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasZtbmvStridedBatched( handle, uplo, transA, diag, m, k, A, lda, stride_a, x, incx, stride_x, batch_count); } // tbsv template <> hipblasStatus_t hipblasTbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float* A, int lda, float* x, int incx) { return hipblasStbsv(handle, uplo, transA, diag, n, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double* A, int lda, double* x, int incx) { return hipblasDtbsv(handle, uplo, transA, diag, n, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtbsv(handle, uplo, transA, diag, n, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtbsv(handle, uplo, transA, diag, n, k, A, lda, x, incx); } // tbsv_batched template <> hipblasStatus_t hipblasTbsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float* const A[], int lda, float* const x[], int incx, int batchCount) { return hipblasStbsvBatched(handle, uplo, transA, diag, n, k, A, lda, x, incx, batchCount); } template <> hipblasStatus_t hipblasTbsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double* const A[], int lda, double* const x[], int incx, int batchCount) { return hipblasDtbsvBatched(handle, uplo, transA, diag, n, k, A, lda, x, incx, batchCount); } template <> hipblasStatus_t hipblasTbsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batchCount) { return hipblasCtbsvBatched(handle, uplo, transA, diag, n, k, A, lda, x, incx, batchCount); } template <> hipblasStatus_t hipblasTbsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batchCount) { return hipblasZtbsvBatched(handle, uplo, transA, diag, n, k, A, lda, x, incx, batchCount); } // tbsv_strided_batched template <> hipblasStatus_t hipblasTbsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float* A, int lda, hipblasStride strideA, float* x, int incx, hipblasStride stridex, int batchCount) { return hipblasStbsvStridedBatched( handle, uplo, transA, diag, n, k, A, lda, strideA, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTbsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double* A, int lda, hipblasStride strideA, double* x, int incx, hipblasStride stridex, int batchCount) { return hipblasDtbsvStridedBatched( handle, uplo, transA, diag, n, k, A, lda, strideA, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTbsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasCtbsvStridedBatched( handle, uplo, transA, diag, n, k, A, lda, strideA, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTbsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasZtbsvStridedBatched( handle, uplo, transA, diag, n, k, A, lda, strideA, x, incx, stridex, batchCount); } // tpmv template <> hipblasStatus_t hipblasTpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, float* x, int incx) { return hipblasStpmv(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, double* x, int incx) { return hipblasDtpmv(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx) { return hipblasCtpmv(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx) { return hipblasZtpmv(handle, uplo, transA, diag, m, AP, x, incx); } // tpmv_batched template <> hipblasStatus_t hipblasTpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const AP[], float* const x[], int incx, int batchCount) { return hipblasStpmvBatched(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const AP[], double* const x[], int incx, int batchCount) { return hipblasDtpmvBatched(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const AP[], hipblasComplex* const x[], int incx, int batchCount) { return hipblasCtpmvBatched(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int incx, int batchCount) { return hipblasZtpmvBatched(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } // tpmv_strided_batched template <> hipblasStatus_t hipblasTpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, hipblasStride strideAP, float* x, int incx, hipblasStride stridex, int batchCount) { return hipblasStpmvStridedBatched( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, hipblasStride strideAP, double* x, int incx, hipblasStride stridex, int batchCount) { return hipblasDtpmvStridedBatched( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasCtpmvStridedBatched( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasZtpmvStridedBatched( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } // tpsv template <> hipblasStatus_t hipblasTpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, float* x, int incx) { return hipblasStpsv(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, double* x, int incx) { return hipblasDtpsv(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx) { return hipblasCtpsv(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx) { return hipblasZtpsv(handle, uplo, transA, diag, m, AP, x, incx); } // tpsv_batched template <> hipblasStatus_t hipblasTpsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const AP[], float* const x[], int incx, int batchCount) { return hipblasStpsvBatched(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const AP[], double* const x[], int incx, int batchCount) { return hipblasDtpsvBatched(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const AP[], hipblasComplex* const x[], int incx, int batchCount) { return hipblasCtpsvBatched(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int incx, int batchCount) { return hipblasZtpsvBatched(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } // tpsv_strided_batched template <> hipblasStatus_t hipblasTpsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, hipblasStride strideAP, float* x, int incx, hipblasStride stridex, int batchCount) { return hipblasStpsvStridedBatched( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, hipblasStride strideAP, double* x, int incx, hipblasStride stridex, int batchCount) { return hipblasDtpsvStridedBatched( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasCtpsvStridedBatched( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasZtpsvStridedBatched( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } // trmv template <> hipblasStatus_t hipblasTrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, float* x, int incx) { return hipblasStrmv(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, double* x, int incx) { return hipblasDtrmv(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtrmv(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtrmv(handle, uplo, transA, diag, m, A, lda, x, incx); } // trmv_batched template <> hipblasStatus_t hipblasTrmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const A[], int lda, float* const x[], int incx, int batch_count) { return hipblasStrmvBatched(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const A[], int lda, double* const x[], int incx, int batch_count) { return hipblasDtrmvBatched(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCtrmvBatched(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZtrmvBatched(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } // trmv_strided_batched template <> hipblasStatus_t hipblasTrmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, hipblasStride stride_a, float* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasStrmvStridedBatched( handle, uplo, transA, diag, m, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTrmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, hipblasStride stride_a, double* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasDtrmvStridedBatched( handle, uplo, transA, diag, m, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTrmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasStride stride_a, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasCtrmvStridedBatched( handle, uplo, transA, diag, m, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTrmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasZtrmvStridedBatched( handle, uplo, transA, diag, m, A, lda, stride_a, x, incx, stride_x, batch_count); } // trsm template <> hipblasStatus_t hipblasTrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* A, int lda, float* B, int ldb) { return hipblasStrsm(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* A, int lda, double* B, int ldb) { return hipblasDtrsm(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* A, int lda, hipblasComplex* B, int ldb) { return hipblasCtrsm(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb) { return hipblasZtrsm(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } // trsm_batched template <> hipblasStatus_t hipblasTrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const A[], int lda, float* B[], int ldb, int batch_count) { return hipblasStrsmBatched( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batch_count); } template <> hipblasStatus_t hipblasTrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const A[], int lda, double* B[], int ldb, int batch_count) { return hipblasDtrsmBatched( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batch_count); } template <> hipblasStatus_t hipblasTrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const A[], int lda, hipblasComplex* B[], int ldb, int batch_count) { return hipblasCtrsmBatched( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batch_count); } template <> hipblasStatus_t hipblasTrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* B[], int ldb, int batch_count) { return hipblasZtrsmBatched( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batch_count); } // trsm_strided_batched template <> hipblasStatus_t hipblasTrsmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* A, int lda, hipblasStride strideA, float* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasStrsmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batch_count); } template <> hipblasStatus_t hipblasTrsmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* A, int lda, hipblasStride strideA, double* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasDtrsmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batch_count); } template <> hipblasStatus_t hipblasTrsmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasCtrsmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batch_count); } template <> hipblasStatus_t hipblasTrsmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasZtrsmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batch_count); } // geam template <> hipblasStatus_t hipblasGeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc) { return hipblasSgeam(handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } template <> hipblasStatus_t hipblasGeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc) { return hipblasDgeam(handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } template <> hipblasStatus_t hipblasGeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* beta, const hipblasComplex* B, int ldb, hipblasComplex* C, int ldc) { return hipblasCgeam(handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } template <> hipblasStatus_t hipblasGeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex* C, int ldc) { return hipblasZgeam(handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } // geam_batched template <> hipblasStatus_t hipblasGeamBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float* alpha, const float* const A[], int lda, const float* beta, const float* const B[], int ldb, float* const C[], int ldc, int batchCount) { return hipblasSgeamBatched( handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc, batchCount); } template <> hipblasStatus_t hipblasGeamBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double* alpha, const double* const A[], int lda, const double* beta, const double* const B[], int ldb, double* const C[], int ldc, int batchCount) { return hipblasDgeamBatched( handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc, batchCount); } template <> hipblasStatus_t hipblasGeamBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* beta, const hipblasComplex* const B[], int ldb, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCgeamBatched( handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc, batchCount); } template <> hipblasStatus_t hipblasGeamBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* const B[], int ldb, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZgeamBatched( handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc, batchCount); } // geam_strided_batched template <> hipblasStatus_t hipblasGeamStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* beta, const float* B, int ldb, hipblasStride strideB, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSgeamStridedBatched(handle, transA, transB, m, n, alpha, A, lda, strideA, beta, B, ldb, strideB, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasGeamStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* beta, const double* B, int ldb, hipblasStride strideB, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDgeamStridedBatched(handle, transA, transB, m, n, alpha, A, lda, strideA, beta, B, ldb, strideB, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasGeamStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* beta, const hipblasComplex* B, int ldb, hipblasStride strideB, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCgeamStridedBatched(handle, transA, transB, m, n, alpha, A, lda, strideA, beta, B, ldb, strideB, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasGeamStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZgeamStridedBatched(handle, transA, transB, m, n, alpha, A, lda, strideA, beta, B, ldb, strideB, C, ldc, strideC, batchCount); } #ifdef __HIP_PLATFORM_SOLVER__ // getrf template <> hipblasStatus_t hipblasGetrf( hipblasHandle_t handle, const int n, float* A, const int lda, int* ipiv, int* info) { return hipblasSgetrf(handle, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGetrf( hipblasHandle_t handle, const int n, double* A, const int lda, int* ipiv, int* info) { return hipblasDgetrf(handle, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGetrf( hipblasHandle_t handle, const int n, hipblasComplex* A, const int lda, int* ipiv, int* info) { return hipblasCgetrf(handle, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGetrf(hipblasHandle_t handle, const int n, hipblasDoubleComplex* A, const int lda, int* ipiv, int* info) { return hipblasZgetrf(handle, n, A, lda, ipiv, info); } // getrf_batched template <> hipblasStatus_t hipblasGetrfBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasSgetrfBatched(handle, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGetrfBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasDgetrfBatched(handle, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGetrfBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasCgetrfBatched(handle, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGetrfBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasZgetrfBatched(handle, n, A, lda, ipiv, info, batchCount); } // getrf_strided_batched template <> hipblasStatus_t hipblasGetrfStridedBatched(hipblasHandle_t handle, const int n, float* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasSgetrfStridedBatched(handle, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGetrfStridedBatched(hipblasHandle_t handle, const int n, double* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasDgetrfStridedBatched(handle, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGetrfStridedBatched(hipblasHandle_t handle, const int n, hipblasComplex* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasCgetrfStridedBatched(handle, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGetrfStridedBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasZgetrfStridedBatched(handle, n, A, lda, strideA, ipiv, strideP, info, batchCount); } // getrs template <> hipblasStatus_t hipblasGetrs(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* A, const int lda, const int* ipiv, float* B, const int ldb, int* info) { return hipblasSgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); } template <> hipblasStatus_t hipblasGetrs(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* A, const int lda, const int* ipiv, double* B, const int ldb, int* info) { return hipblasDgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); } template <> hipblasStatus_t hipblasGetrs(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* A, const int lda, const int* ipiv, hipblasComplex* B, const int ldb, int* info) { return hipblasCgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); } template <> hipblasStatus_t hipblasGetrs(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const int* ipiv, hipblasDoubleComplex* B, const int ldb, int* info) { return hipblasZgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); } // getrs_batched template <> hipblasStatus_t hipblasGetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount) { return hipblasSgetrsBatched(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info, batchCount); } template <> hipblasStatus_t hipblasGetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount) { return hipblasDgetrsBatched(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info, batchCount); } template <> hipblasStatus_t hipblasGetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount) { return hipblasCgetrsBatched(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info, batchCount); } template <> hipblasStatus_t hipblasGetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount) { return hipblasZgetrsBatched(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info, batchCount); } // getrs_strided_batched template <> hipblasStatus_t hipblasGetrsStridedBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, float* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasSgetrsStridedBatched( handle, trans, n, nrhs, A, lda, strideA, ipiv, strideP, B, ldb, strideB, info, batchCount); } template <> hipblasStatus_t hipblasGetrsStridedBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, double* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasDgetrsStridedBatched( handle, trans, n, nrhs, A, lda, strideA, ipiv, strideP, B, ldb, strideB, info, batchCount); } template <> hipblasStatus_t hipblasGetrsStridedBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, hipblasComplex* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasCgetrsStridedBatched( handle, trans, n, nrhs, A, lda, strideA, ipiv, strideP, B, ldb, strideB, info, batchCount); } template <> hipblasStatus_t hipblasGetrsStridedBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, hipblasDoubleComplex* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasZgetrsStridedBatched( handle, trans, n, nrhs, A, lda, strideA, ipiv, strideP, B, ldb, strideB, info, batchCount); } // getri_batched template <> hipblasStatus_t hipblasGetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount) { return hipblasSgetriBatched(handle, n, A, lda, ipiv, C, ldc, info, batchCount); } template <> hipblasStatus_t hipblasGetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount) { return hipblasDgetriBatched(handle, n, A, lda, ipiv, C, ldc, info, batchCount); } template <> hipblasStatus_t hipblasGetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount) { return hipblasCgetriBatched(handle, n, A, lda, ipiv, C, ldc, info, batchCount); } template <> hipblasStatus_t hipblasGetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount) { return hipblasZgetriBatched(handle, n, A, lda, ipiv, C, ldc, info, batchCount); } // geqrf template <> hipblasStatus_t hipblasGeqrf(hipblasHandle_t handle, const int m, const int n, float* A, const int lda, float* ipiv, int* info) { return hipblasSgeqrf(handle, m, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGeqrf(hipblasHandle_t handle, const int m, const int n, double* A, const int lda, double* ipiv, int* info) { return hipblasDgeqrf(handle, m, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGeqrf(hipblasHandle_t handle, const int m, const int n, hipblasComplex* A, const int lda, hipblasComplex* ipiv, int* info) { return hipblasCgeqrf(handle, m, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGeqrf(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* A, const int lda, hipblasDoubleComplex* ipiv, int* info) { return hipblasZgeqrf(handle, m, n, A, lda, ipiv, info); } // geqrf_batched template <> hipblasStatus_t hipblasGeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount) { return hipblasSgeqrfBatched(handle, m, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfBatched(hipblasHandle_t handle, const int m, const int n, double* const A[], const int lda, double* const ipiv[], int* info, const int batchCount) { return hipblasDgeqrfBatched(handle, m, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasComplex* const A[], const int lda, hipblasComplex* const ipiv[], int* info, const int batchCount) { return hipblasCgeqrfBatched(handle, m, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const ipiv[], int* info, const int batchCount) { return hipblasZgeqrfBatched(handle, m, n, A, lda, ipiv, info, batchCount); } // geqrf_strided_batched template <> hipblasStatus_t hipblasGeqrfStridedBatched(hipblasHandle_t handle, const int m, const int n, float* A, const int lda, const hipblasStride strideA, float* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasSgeqrfStridedBatched( handle, m, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfStridedBatched(hipblasHandle_t handle, const int m, const int n, double* A, const int lda, const hipblasStride strideA, double* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasDgeqrfStridedBatched( handle, m, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfStridedBatched(hipblasHandle_t handle, const int m, const int n, hipblasComplex* A, const int lda, const hipblasStride strideA, hipblasComplex* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasCgeqrfStridedBatched( handle, m, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfStridedBatched(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, hipblasDoubleComplex* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasZgeqrfStridedBatched( handle, m, n, A, lda, strideA, ipiv, strideP, info, batchCount); } // gels template <> hipblasStatus_t hipblasGels(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float* A, const int lda, float* B, const int ldb, int* info, int* deviceInfo) { return hipblasSgels(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo); } template <> hipblasStatus_t hipblasGels(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double* A, const int lda, double* B, const int ldb, int* info, int* deviceInfo) { return hipblasDgels(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo); } template <> hipblasStatus_t hipblasGels(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* A, const int lda, hipblasComplex* B, const int ldb, int* info, int* deviceInfo) { return hipblasCgels(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo); } template <> hipblasStatus_t hipblasGels(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, hipblasDoubleComplex* B, const int ldb, int* info, int* deviceInfo) { return hipblasZgels(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo); } // gelsBatched template <> hipblasStatus_t hipblasGelsBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float* const A[], const int lda, float* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasSgelsBatched( handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double* const A[], const int lda, double* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasDgelsBatched( handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* const A[], const int lda, hipblasComplex* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasCgelsBatched( handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasZgelsBatched( handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount); } // gelsStridedBatched template <> hipblasStatus_t hipblasGelsStridedBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float* A, const int lda, const hipblasStride strideA, float* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasSgelsStridedBatched( handle, trans, m, n, nrhs, A, lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsStridedBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double* A, const int lda, const hipblasStride strideA, double* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasDgelsStridedBatched( handle, trans, m, n, nrhs, A, lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsStridedBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* A, const int lda, const hipblasStride strideA, hipblasComplex* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasCgelsStridedBatched( handle, trans, m, n, nrhs, A, lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsStridedBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, hipblasDoubleComplex* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasZgelsStridedBatched( handle, trans, m, n, nrhs, A, lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount); } #endif ///////////// // FORTRAN // ///////////// // axpy template <> hipblasStatus_t hipblasAxpy(hipblasHandle_t handle, int n, const hipblasHalf* alpha, const hipblasHalf* x, int incx, hipblasHalf* y, int incy) { return hipblasHaxpyFortran(handle, n, alpha, x, incx, y, incy); } template <> hipblasStatus_t hipblasAxpy( hipblasHandle_t handle, int n, const float* alpha, const float* x, int incx, float* y, int incy) { return hipblasSaxpyFortran(handle, n, alpha, x, incx, y, incy); } template <> hipblasStatus_t hipblasAxpy(hipblasHandle_t handle, int n, const double* alpha, const double* x, int incx, double* y, int incy) { return hipblasDaxpyFortran(handle, n, alpha, x, incx, y, incy); } template <> hipblasStatus_t hipblasAxpy(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* y, int incy) { return hipblasCaxpyFortran(handle, n, alpha, x, incx, y, incy); } template <> hipblasStatus_t hipblasAxpy(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { return hipblasZaxpyFortran(handle, n, alpha, x, incx, y, incy); } // axpy_batched template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const hipblasHalf* alpha, const hipblasHalf* const x[], int incx, hipblasHalf* const y[], int incy, int batch_count) { return hipblasHaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const float* alpha, const float* const x[], int incx, float* const y[], int incy, int batch_count) { return hipblasSaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const double* alpha, const double* const x[], int incx, double* const y[], int incy, int batch_count) { return hipblasDaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count); } // axpy_strided_batched template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const hipblasHalf* alpha, const hipblasHalf* x, int incx, hipblasStride stridex, hipblasHalf* y, int incy, hipblasStride stridey, int batch_count) { return hipblasHaxpyStridedBatchedFortran( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, int batch_count) { return hipblasSaxpyStridedBatchedFortran( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, int batch_count) { return hipblasDaxpyStridedBatchedFortran( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCaxpyStridedBatchedFortran( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZaxpyStridedBatchedFortran( handle, n, alpha, x, incx, stridex, y, incy, stridey, batch_count); } // scal template <> hipblasStatus_t hipblasScal( hipblasHandle_t handle, int n, const float* alpha, float* x, int incx) { return hipblasSscalFortran(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal( hipblasHandle_t handle, int n, const double* alpha, double* x, int incx) { return hipblasDscalFortran(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal( hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx) { return hipblasCscalFortran(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal( hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx) { return hipblasCsscalFortran(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx) { return hipblasZscalFortran(handle, n, alpha, x, incx); } template <> hipblasStatus_t hipblasScal( hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx) { return hipblasZdscalFortran(handle, n, alpha, x, incx); } // scal_batched template <> hipblasStatus_t hipblasScalBatched( hipblasHandle_t handle, int n, const float* alpha, float* const x[], int incx, int batch_count) { return hipblasSscalBatchedFortran(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched(hipblasHandle_t handle, int n, const double* alpha, double* const x[], int incx, int batch_count) { return hipblasDscalBatchedFortran(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCscalBatchedFortran(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched( hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZscalBatchedFortran(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCsscalBatchedFortran(handle, n, alpha, x, incx, batch_count); } template <> hipblasStatus_t hipblasScalBatched(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZdscalBatchedFortran(handle, n, alpha, x, incx, batch_count); } // scal_strided_batched template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const float* alpha, float* x, int incx, hipblasStride stridex, int batch_count) { return hipblasSscalStridedBatchedFortran(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const double* alpha, double* x, int incx, hipblasStride stridex, int batch_count) { return hipblasDscalStridedBatchedFortran(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasCscalStridedBatchedFortran(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched( hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasZscalStridedBatchedFortran(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasCsscalStridedBatchedFortran(handle, n, alpha, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasZdscalStridedBatchedFortran(handle, n, alpha, x, incx, stridex, batch_count); } //swap template <> hipblasStatus_t hipblasSwap(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy) { return hipblasSswapFortran(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasSwap( hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy) { return hipblasDswapFortran(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasSwap( hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy) { return hipblasCswapFortran(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasSwap(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { return hipblasZswapFortran(handle, n, x, incx, y, incy); } // swap_batched template <> hipblasStatus_t hipblasSwapBatched( hipblasHandle_t handle, int n, float* x[], int incx, float* y[], int incy, int batch_count) { return hipblasSswapBatchedFortran(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasSwapBatched( hipblasHandle_t handle, int n, double* x[], int incx, double* y[], int incy, int batch_count) { return hipblasDswapBatchedFortran(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasSwapBatched(hipblasHandle_t handle, int n, hipblasComplex* x[], int incx, hipblasComplex* y[], int incy, int batch_count) { return hipblasCswapBatchedFortran(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasSwapBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* x[], int incx, hipblasDoubleComplex* y[], int incy, int batch_count) { return hipblasZswapBatchedFortran(handle, n, x, incx, y, incy, batch_count); } // swap_strided_batched template <> hipblasStatus_t hipblasSwapStridedBatched(hipblasHandle_t handle, int n, float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, int batch_count) { return hipblasSswapStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasSwapStridedBatched(hipblasHandle_t handle, int n, double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, int batch_count) { return hipblasDswapStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasSwapStridedBatched(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCswapStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasSwapStridedBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZswapStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count); } // copy template <> hipblasStatus_t hipblasCopy( hipblasHandle_t handle, int n, const float* x, int incx, float* y, int incy) { return hipblasScopyFortran(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasCopy( hipblasHandle_t handle, int n, const double* x, int incx, double* y, int incy) { return hipblasDcopyFortran(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasCopy( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasComplex* y, int incy) { return hipblasCcopyFortran(handle, n, x, incx, y, incy); } template <> hipblasStatus_t hipblasCopy(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { return hipblasZcopyFortran(handle, n, x, incx, y, incy); } // copy_batched template <> hipblasStatus_t hipblasCopyBatched(hipblasHandle_t handle, int n, const float* const x[], int incx, float* const y[], int incy, int batch_count) { return hipblasScopyBatchedFortran(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasCopyBatched(hipblasHandle_t handle, int n, const double* const x[], int incx, double* const y[], int incy, int batch_count) { return hipblasDcopyBatchedFortran(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasCopyBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCcopyBatchedFortran(handle, n, x, incx, y, incy, batch_count); } template <> hipblasStatus_t hipblasCopyBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZcopyBatchedFortran(handle, n, x, incx, y, incy, batch_count); } // copy_strided_batched template <> hipblasStatus_t hipblasCopyStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, int batch_count) { return hipblasScopyStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasCopyStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, int batch_count) { return hipblasDcopyStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasCopyStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCcopyStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasCopyStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZcopyStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count); } // dot template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const hipblasHalf* x, int incx, const hipblasHalf* y, int incy, hipblasHalf* result) { return hipblasHdotFortran(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const hipblasBfloat16* x, int incx, const hipblasBfloat16* y, int incy, hipblasBfloat16* result) { return hipblasBfdotFortran(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result) { return hipblasSdotFortran(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result) { return hipblasDdotFortran(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result) { return hipblasCdotuFortran(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDot(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result) { return hipblasZdotuFortran(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDotc(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result) { return hipblasCdotcFortran(handle, n, x, incx, y, incy, result); } template <> hipblasStatus_t hipblasDotc(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result) { return hipblasZdotcFortran(handle, n, x, incx, y, incy, result); } // dot_batched template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const hipblasHalf* const x[], int incx, const hipblasHalf* const y[], int incy, int batch_count, hipblasHalf* result) { return hipblasHdotBatchedFortran(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const hipblasBfloat16* const x[], int incx, const hipblasBfloat16* const y[], int incy, int batch_count, hipblasBfloat16* result) { return hipblasBfdotBatchedFortran(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const float* const x[], int incx, const float* const y[], int incy, int batch_count, float* result) { return hipblasSdotBatchedFortran(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const double* const x[], int incx, const double* const y[], int incy, int batch_count, double* result) { return hipblasDdotBatchedFortran(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, int batch_count, hipblasComplex* result) { return hipblasCdotuBatchedFortran(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotcBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, int batch_count, hipblasComplex* result) { return hipblasCdotcBatchedFortran(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotuBatchedFortran(handle, n, x, incx, y, incy, batch_count, result); } template <> hipblasStatus_t hipblasDotcBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotcBatchedFortran(handle, n, x, incx, y, incy, batch_count, result); } // dot_strided_batched template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const hipblasHalf* x, int incx, hipblasStride stridex, const hipblasHalf* y, int incy, hipblasStride stridey, int batch_count, hipblasHalf* result) { return hipblasHdotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const hipblasBfloat16* x, int incx, hipblasStride stridex, const hipblasBfloat16* y, int incy, hipblasStride stridey, int batch_count, hipblasBfloat16* result) { return hipblasBfdotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, int batch_count, float* result) { return hipblasSdotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, int batch_count, double* result) { return hipblasDdotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasComplex* result) { return hipblasCdotuStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotcStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasComplex* result) { return hipblasCdotcStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotuStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } template <> hipblasStatus_t hipblasDotcStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotcStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, batch_count, result); } // asum template <> hipblasStatus_t hipblasAsum( hipblasHandle_t handle, int n, const float* x, int incx, float* result) { return hipblasSasumFortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasAsum( hipblasHandle_t handle, int n, const double* x, int incx, double* result) { return hipblasDasumFortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasAsum( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result) { return hipblasScasumFortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasAsum( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result) { return hipblasDzasumFortran(handle, n, x, incx, result); } // asum_batched template <> hipblasStatus_t hipblasAsumBatched( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, float* result) { return hipblasSasumBatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasAsumBatched(hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, double* result) { return hipblasDasumBatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasAsumBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, float* result) { return hipblasScasumBatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasAsumBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, double* result) { return hipblasDzasumBatchedFortran(handle, n, x, incx, batch_count, result); } // asum_strided_batched template <> hipblasStatus_t hipblasAsumStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasSasumStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasAsumStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDasumStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasAsumStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasScasumStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasAsumStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDzasumStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } // nrm2 template <> hipblasStatus_t hipblasNrm2( hipblasHandle_t handle, int n, const float* x, int incx, float* result) { return hipblasSnrm2Fortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasNrm2( hipblasHandle_t handle, int n, const double* x, int incx, double* result) { return hipblasDnrm2Fortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasNrm2( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result) { return hipblasScnrm2Fortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasNrm2( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result) { return hipblasDznrm2Fortran(handle, n, x, incx, result); } // nrm2_batched template <> hipblasStatus_t hipblasNrm2Batched( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, float* result) { return hipblasSnrm2BatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasNrm2Batched(hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, double* result) { return hipblasDnrm2BatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasNrm2Batched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, float* result) { return hipblasScnrm2BatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasNrm2Batched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, double* result) { return hipblasDznrm2BatchedFortran(handle, n, x, incx, batch_count, result); } // nrm2_strided_batched template <> hipblasStatus_t hipblasNrm2StridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasSnrm2StridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasNrm2StridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDnrm2StridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasNrm2StridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasScnrm2StridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasNrm2StridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDznrm2StridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } // rot template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* c, const float* s) { return hipblasSrotFortran(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* c, const double* s) { return hipblasDrotFortran(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const hipblasComplex* s) { return hipblasCrotFortran(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const float* s) { return hipblasCsrotFortran(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot( hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const hipblasDoubleComplex* s) { return hipblasZrotFortran(handle, n, x, incx, y, incy, c, s); } template <> hipblasStatus_t hipblasRot(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const double* s) { return hipblasZdrotFortran(handle, n, x, incx, y, incy, c, s); } // rot_batched template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, float* const x[], int incx, float* const y[], int incy, const float* c, const float* s, int batch_count) { return hipblasSrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, double* const x[], int incx, double* const y[], int incy, const double* c, const double* s, int batch_count) { return hipblasDrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, const float* c, const hipblasComplex* s, int batch_count) { return hipblasCrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, const float* c, const float* s, int batch_count) { return hipblasCsrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched( hipblasHandle_t handle, int n, hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, const double* c, const hipblasDoubleComplex* s, int batch_count) { return hipblasZrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count); } template <> hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, const double* c, const double* s, int batch_count) { return hipblasZdrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count); } // rot_strided_batched template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, const float* c, const float* s, int batch_count) { return hipblasSrotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, const double* c, const double* s, int batch_count) { return hipblasDrotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, const float* c, const hipblasComplex* s, int batch_count) { return hipblasCrotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, const float* c, const float* s, int batch_count) { return hipblasCsrotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched( hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, const double* c, const hipblasDoubleComplex* s, int batch_count) { return hipblasZrotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } template <> hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, const double* c, const double* s, int batch_count) { return hipblasZdrotStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, c, s, batch_count); } // rotg template <> hipblasStatus_t hipblasRotg(hipblasHandle_t handle, float* a, float* b, float* c, float* s) { return hipblasSrotgFortran(handle, a, b, c, s); } template <> hipblasStatus_t hipblasRotg( hipblasHandle_t handle, double* a, double* b, double* c, double* s) { return hipblasDrotgFortran(handle, a, b, c, s); } template <> hipblasStatus_t hipblasRotg( hipblasHandle_t handle, hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s) { return hipblasCrotgFortran(handle, a, b, c, s); } template <> hipblasStatus_t hipblasRotg(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s) { return hipblasZrotgFortran(handle, a, b, c, s); } // rotg_batched template <> hipblasStatus_t hipblasRotgBatched(hipblasHandle_t handle, float* const a[], float* const b[], float* const c[], float* const s[], int batch_count) { return hipblasSrotgBatchedFortran(handle, a, b, c, s, batch_count); } template <> hipblasStatus_t hipblasRotgBatched(hipblasHandle_t handle, double* const a[], double* const b[], double* const c[], double* const s[], int batch_count) { return hipblasDrotgBatchedFortran(handle, a, b, c, s, batch_count); } template <> hipblasStatus_t hipblasRotgBatched(hipblasHandle_t handle, hipblasComplex* const a[], hipblasComplex* const b[], float* const c[], hipblasComplex* const s[], int batch_count) { return hipblasCrotgBatchedFortran(handle, a, b, c, s, batch_count); } template <> hipblasStatus_t hipblasRotgBatched(hipblasHandle_t handle, hipblasDoubleComplex* const a[], hipblasDoubleComplex* const b[], double* const c[], hipblasDoubleComplex* const s[], int batch_count) { return hipblasZrotgBatchedFortran(handle, a, b, c, s, batch_count); } // rotg_strided_batched template <> hipblasStatus_t hipblasRotgStridedBatched(hipblasHandle_t handle, float* a, hipblasStride stridea, float* b, hipblasStride strideb, float* c, hipblasStride stridec, float* s, hipblasStride strides, int batch_count) { return hipblasSrotgStridedBatchedFortran( handle, a, stridea, b, strideb, c, stridec, s, strides, batch_count); } template <> hipblasStatus_t hipblasRotgStridedBatched(hipblasHandle_t handle, double* a, hipblasStride stridea, double* b, hipblasStride strideb, double* c, hipblasStride stridec, double* s, hipblasStride strides, int batch_count) { return hipblasDrotgStridedBatchedFortran( handle, a, stridea, b, strideb, c, stridec, s, strides, batch_count); } template <> hipblasStatus_t hipblasRotgStridedBatched(hipblasHandle_t handle, hipblasComplex* a, hipblasStride stridea, hipblasComplex* b, hipblasStride strideb, float* c, hipblasStride stridec, hipblasComplex* s, hipblasStride strides, int batch_count) { return hipblasCrotgStridedBatchedFortran( handle, a, stridea, b, strideb, c, stridec, s, strides, batch_count); } template <> hipblasStatus_t hipblasRotgStridedBatched(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasStride stridea, hipblasDoubleComplex* b, hipblasStride strideb, double* c, hipblasStride stridec, hipblasDoubleComplex* s, hipblasStride strides, int batch_count) { return hipblasZrotgStridedBatchedFortran( handle, a, stridea, b, strideb, c, stridec, s, strides, batch_count); } // rotm template <> hipblasStatus_t hipblasRotm( hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* param) { return hipblasSrotmFortran(handle, n, x, incx, y, incy, param); } template <> hipblasStatus_t hipblasRotm( hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* param) { return hipblasDrotmFortran(handle, n, x, incx, y, incy, param); } // rotm_batched template <> hipblasStatus_t hipblasRotmBatched(hipblasHandle_t handle, int n, float* const x[], int incx, float* const y[], int incy, const float* const param[], int batch_count) { return hipblasSrotmBatchedFortran(handle, n, x, incx, y, incy, param, batch_count); } template <> hipblasStatus_t hipblasRotmBatched(hipblasHandle_t handle, int n, double* const x[], int incx, double* const y[], int incy, const double* const param[], int batch_count) { return hipblasDrotmBatchedFortran(handle, n, x, incx, y, incy, param, batch_count); } // rotm_strided_batched template <> hipblasStatus_t hipblasRotmStridedBatched(hipblasHandle_t handle, int n, float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, const float* param, hipblasStride strideparam, int batch_count) { return hipblasSrotmStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, param, strideparam, batch_count); } template <> hipblasStatus_t hipblasRotmStridedBatched(hipblasHandle_t handle, int n, double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, const double* param, hipblasStride strideparam, int batch_count) { return hipblasDrotmStridedBatchedFortran( handle, n, x, incx, stridex, y, incy, stridey, param, strideparam, batch_count); } // rotmg template <> hipblasStatus_t hipblasRotmg( hipblasHandle_t handle, float* d1, float* d2, float* x1, const float* y1, float* param) { return hipblasSrotmgFortran(handle, d1, d2, x1, y1, param); } template <> hipblasStatus_t hipblasRotmg( hipblasHandle_t handle, double* d1, double* d2, double* x1, const double* y1, double* param) { return hipblasDrotmgFortran(handle, d1, d2, x1, y1, param); } // rotmg_batched template <> hipblasStatus_t hipblasRotmgBatched(hipblasHandle_t handle, float* const d1[], float* const d2[], float* const x1[], const float* const y1[], float* const param[], int batch_count) { return hipblasSrotmgBatchedFortran(handle, d1, d2, x1, y1, param, batch_count); } template <> hipblasStatus_t hipblasRotmgBatched(hipblasHandle_t handle, double* const d1[], double* const d2[], double* const x1[], const double* const y1[], double* const param[], int batch_count) { return hipblasDrotmgBatchedFortran(handle, d1, d2, x1, y1, param, batch_count); } // rotmg_strided_batched template <> hipblasStatus_t hipblasRotmgStridedBatched(hipblasHandle_t handle, float* d1, hipblasStride stride_d1, float* d2, hipblasStride stride_d2, float* x1, hipblasStride stride_x1, const float* y1, hipblasStride stride_y1, float* param, hipblasStride strideparam, int batch_count) { return hipblasSrotmgStridedBatchedFortran(handle, d1, stride_d1, d2, stride_d2, x1, stride_x1, y1, stride_y1, param, strideparam, batch_count); } template <> hipblasStatus_t hipblasRotmgStridedBatched(hipblasHandle_t handle, double* d1, hipblasStride stride_d1, double* d2, hipblasStride stride_d2, double* x1, hipblasStride stride_x1, const double* y1, hipblasStride stride_y1, double* param, hipblasStride strideparam, int batch_count) { return hipblasDrotmgStridedBatchedFortran(handle, d1, stride_d1, d2, stride_d2, x1, stride_x1, y1, stride_y1, param, strideparam, batch_count); } // amax template <> hipblasStatus_t hipblasIamax(hipblasHandle_t handle, int n, const float* x, int incx, int* result) { return hipblasIsamaxFortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamax( hipblasHandle_t handle, int n, const double* x, int incx, int* result) { return hipblasIdamaxFortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamax( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result) { return hipblasIcamaxFortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamax( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result) { return hipblasIzamaxFortran(handle, n, x, incx, result); } // amax_batched template <> hipblasStatus_t hipblasIamaxBatched( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, int* result) { return hipblasIsamaxBatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIamaxBatched( hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, int* result) { return hipblasIdamaxBatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIamaxBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, int* result) { return hipblasIcamaxBatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIamaxBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, int* result) { return hipblasIzamaxBatchedFortran(handle, n, x, incx, batch_count, result); } // amax_strided_batched template <> hipblasStatus_t hipblasIamaxStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIsamaxStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIamaxStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIdamaxStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIamaxStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIcamaxStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIamaxStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIzamaxStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } // amin template <> hipblasStatus_t hipblasIamin(hipblasHandle_t handle, int n, const float* x, int incx, int* result) { return hipblasIsaminFortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamin( hipblasHandle_t handle, int n, const double* x, int incx, int* result) { return hipblasIdaminFortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamin( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result) { return hipblasIcaminFortran(handle, n, x, incx, result); } template <> hipblasStatus_t hipblasIamin( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result) { return hipblasIzaminFortran(handle, n, x, incx, result); } // amin_batched template <> hipblasStatus_t hipblasIaminBatched( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, int* result) { return hipblasIsaminBatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIaminBatched( hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, int* result) { return hipblasIdaminBatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIaminBatched(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, int* result) { return hipblasIcaminBatchedFortran(handle, n, x, incx, batch_count, result); } template <> hipblasStatus_t hipblasIaminBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, int* result) { return hipblasIzaminBatchedFortran(handle, n, x, incx, batch_count, result); } // amin_strided_batched template <> hipblasStatus_t hipblasIaminStridedBatched(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIsaminStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIaminStridedBatched(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIdaminStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIaminStridedBatched(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIcaminStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } template <> hipblasStatus_t hipblasIaminStridedBatched(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIzaminStridedBatchedFortran(handle, n, x, incx, stridex, batch_count, result); } /* * =========================================================================== * level 2 BLAS * =========================================================================== */ // gbmv template <> hipblasStatus_t hipblasGbmv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSgbmvFortran(handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGbmv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDgbmvFortran(handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGbmv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasCgbmvFortran(handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGbmv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZgbmvFortran(handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } // gbmv_batched template <> hipblasStatus_t hipblasGbmvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* const y[], int incy, int batch_count) { return hipblasSgbmvBatchedFortran( handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGbmvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* const y[], int incy, int batch_count) { return hipblasDgbmvBatchedFortran( handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGbmvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCgbmvBatchedFortran( handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGbmvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZgbmvBatchedFortran( handle, transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy, batch_count); } // gbmv_strided_batched template <> hipblasStatus_t hipblasGbmvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const float* alpha, const float* A, int lda, hipblasStride stride_a, const float* x, int incx, hipblasStride stride_x, const float* beta, float* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasSgbmvStridedBatchedFortran(handle, transA, m, n, kl, ku, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } template <> hipblasStatus_t hipblasGbmvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const double* alpha, const double* A, int lda, hipblasStride stride_a, const double* x, int incx, hipblasStride stride_x, const double* beta, double* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasDgbmvStridedBatchedFortran(handle, transA, m, n, kl, ku, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } template <> hipblasStatus_t hipblasGbmvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride stride_a, const hipblasComplex* x, int incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasCgbmvStridedBatchedFortran(handle, transA, m, n, kl, ku, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } template <> hipblasStatus_t hipblasGbmvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasZgbmvStridedBatchedFortran(handle, transA, m, n, kl, ku, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } // gemv template <> hipblasStatus_t hipblasGemv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSgemvFortran(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGemv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDgemvFortran(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGemv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasCgemvFortran(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasGemv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZgemvFortran(handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } // gemv_batched template <> hipblasStatus_t hipblasGemvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* const y[], int incy, int batch_count) { return hipblasSgemvBatchedFortran( handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGemvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* const y[], int incy, int batch_count) { return hipblasDgemvBatchedFortran( handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGemvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCgemvBatchedFortran( handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasGemvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZgemvBatchedFortran( handle, transA, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } // gemv_strided_batched template <> hipblasStatus_t hipblasGemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batch_count) { return hipblasSgemvStridedBatchedFortran(handle, transA, m, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasGemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batch_count) { return hipblasDgemvStridedBatchedFortran(handle, transA, m, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasGemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCgemvStridedBatchedFortran(handle, transA, m, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batch_count); } template <> hipblasStatus_t hipblasGemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZgemvStridedBatchedFortran(handle, transA, m, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batch_count); } // ger template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda) { return hipblasSgerFortran(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda) { return hipblasDgerFortran(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCgeruFortran(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCgercFortran(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZgeruFortran(handle, m, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZgercFortran(handle, m, n, alpha, x, incx, y, incy, A, lda); } // ger_batched template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const float* alpha, const float* const x[], int incx, const float* const y[], int incy, float* const A[], int lda, int batch_count) { return hipblasSgerBatchedFortran(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const double* alpha, const double* const x[], int incx, const double* const y[], int incy, double* const A[], int lda, int batch_count) { return hipblasDgerBatchedFortran(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batch_count) { return hipblasCgeruBatchedFortran(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batch_count) { return hipblasCgercBatchedFortran(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batch_count) { return hipblasZgeruBatchedFortran(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } template <> hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batch_count) { return hipblasZgercBatchedFortran(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count); } // ger_strided_batched template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, float* A, int lda, hipblasStride strideA, int batch_count) { return hipblasSgerStridedBatchedFortran( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, double* A, int lda, hipblasStride strideA, int batch_count) { return hipblasDgerStridedBatchedFortran( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasCgeruStridedBatchedFortran( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasCgercStridedBatchedFortran( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasZgeruStridedBatchedFortran( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasZgercStridedBatchedFortran( handle, m, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batch_count); } // hbmv template <> hipblasStatus_t hipblasHbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasChbmvFortran(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasHbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZhbmvFortran(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } // hbmv_batched template <> hipblasStatus_t hipblasHbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batchCount) { return hipblasChbmvBatchedFortran( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasHbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batchCount) { return hipblasZhbmvBatchedFortran( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy, batchCount); } // hbmv_strided_batched template <> hipblasStatus_t hipblasHbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasChbmvStridedBatchedFortran(handle, uplo, n, k, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasHbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasZhbmvStridedBatchedFortran(handle, uplo, n, k, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } // hemv template <> hipblasStatus_t hipblasHemv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasChemvFortran(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasHemv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZhemvFortran(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } // hemv_batched template <> hipblasStatus_t hipblasHemvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count) { return hipblasChemvBatchedFortran( handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } template <> hipblasStatus_t hipblasHemvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZhemvBatchedFortran( handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batch_count); } // hemv_strided_batched template <> hipblasStatus_t hipblasHemvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride stride_a, const hipblasComplex* x, int incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasChemvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } template <> hipblasStatus_t hipblasHemvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasZhemvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_count); } // her template <> hipblasStatus_t hipblasHer(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* A, int lda) { return hipblasCherFortran(handle, uplo, n, alpha, x, incx, A, lda); } template <> hipblasStatus_t hipblasHer(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda) { return hipblasZherFortran(handle, uplo, n, alpha, x, incx, A, lda); } // her_batched template <> hipblasStatus_t hipblasHerBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const A[], int lda, int batchCount) { return hipblasCherBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batchCount); } template <> hipblasStatus_t hipblasHerBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const A[], int lda, int batchCount) { return hipblasZherBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batchCount); } // her_strided_batched template <> hipblasStatus_t hipblasHerStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasCherStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasHerStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasZherStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batchCount); } // her2 template <> hipblasStatus_t hipblasHer2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCher2Fortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasHer2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZher2Fortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } // her2_batched template <> hipblasStatus_t hipblasHer2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batchCount) { return hipblasCher2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } template <> hipblasStatus_t hipblasHer2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batchCount) { return hipblasZher2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } // her2_strided_batched template <> hipblasStatus_t hipblasHer2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasCher2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasHer2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasZher2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } // hpmv template <> hipblasStatus_t hipblasHpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasChpmvFortran(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasHpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZhpmvFortran(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); } // hpmv_batched template <> hipblasStatus_t hipblasHpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const AP[], const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batchCount) { return hipblasChpmvBatchedFortran( handle, uplo, n, alpha, AP, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasHpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batchCount) { return hipblasZhpmvBatchedFortran( handle, uplo, n, alpha, AP, x, incx, beta, y, incy, batchCount); } // hpmv_strided_batched template <> hipblasStatus_t hipblasHpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, hipblasStride strideAP, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasChpmvStridedBatchedFortran( handle, uplo, n, alpha, AP, strideAP, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasHpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, hipblasStride strideAP, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasZhpmvStridedBatchedFortran( handle, uplo, n, alpha, AP, strideAP, x, incx, stridex, beta, y, incy, stridey, batchCount); } // hpr template <> hipblasStatus_t hipblasHpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP) { return hipblasChprFortran(handle, uplo, n, alpha, x, incx, AP); } template <> hipblasStatus_t hipblasHpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP) { return hipblasZhprFortran(handle, uplo, n, alpha, x, incx, AP); } // hpr_batched template <> hipblasStatus_t hipblasHprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const AP[], int batchCount) { return hipblasChprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batchCount); } template <> hipblasStatus_t hipblasHprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const AP[], int batchCount) { return hipblasZhprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batchCount); } // hpr_strided_batched template <> hipblasStatus_t hipblasHprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasChprStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasHprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasZhprStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } // hpr2 template <> hipblasStatus_t hipblasHpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* AP) { return hipblasChpr2Fortran(handle, uplo, n, alpha, x, incx, y, incy, AP); } template <> hipblasStatus_t hipblasHpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP) { return hipblasZhpr2Fortran(handle, uplo, n, alpha, x, incx, y, incy, AP); } // hpr2_batched template <> hipblasStatus_t hipblasHpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const AP[], int batchCount) { return hipblasChpr2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, AP, batchCount); } template <> hipblasStatus_t hipblasHpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const AP[], int batchCount) { return hipblasZhpr2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, AP, batchCount); } // hpr2_strided_batched template <> hipblasStatus_t hipblasHpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasChpr2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasHpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasZhpr2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, AP, strideAP, batchCount); } // sbmv template <> hipblasStatus_t hipblasSbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSsbmvFortran(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDsbmvFortran(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } // sbmv_batched template <> hipblasStatus_t hipblasSbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* y[], int incy, int batchCount) { return hipblasSsbmvBatchedFortran( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* y[], int incy, int batchCount) { return hipblasDsbmvBatchedFortran( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy, batchCount); } // sbmv_strided_batched template <> hipblasStatus_t hipblasSbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount) { return hipblasSsbmvStridedBatchedFortran(handle, uplo, n, k, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount) { return hipblasDsbmvStridedBatchedFortran(handle, uplo, n, k, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } // spmv template <> hipblasStatus_t hipblasSpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* AP, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSspmvFortran(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* AP, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDspmvFortran(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); } // spmv_batched template <> hipblasStatus_t hipblasSpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const AP[], const float* const x[], int incx, const float* beta, float* y[], int incy, int batchCount) { return hipblasSspmvBatchedFortran( handle, uplo, n, alpha, AP, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const AP[], const double* const x[], int incx, const double* beta, double* y[], int incy, int batchCount) { return hipblasDspmvBatchedFortran( handle, uplo, n, alpha, AP, x, incx, beta, y, incy, batchCount); } // spmv_strided_batched template <> hipblasStatus_t hipblasSpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* AP, hipblasStride strideAP, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount) { return hipblasSspmvStridedBatchedFortran( handle, uplo, n, alpha, AP, strideAP, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* AP, hipblasStride strideAP, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount) { return hipblasDspmvStridedBatchedFortran( handle, uplo, n, alpha, AP, strideAP, x, incx, stridex, beta, y, incy, stridey, batchCount); } // spr template <> hipblasStatus_t hipblasSpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP) { return hipblasSsprFortran(handle, uplo, n, alpha, x, incx, AP); } template <> hipblasStatus_t hipblasSpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* AP) { return hipblasDsprFortran(handle, uplo, n, alpha, x, incx, AP); } template <> hipblasStatus_t hipblasSpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP) { return hipblasCsprFortran(handle, uplo, n, alpha, x, incx, AP); } template <> hipblasStatus_t hipblasSpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP) { return hipblasZsprFortran(handle, uplo, n, alpha, x, incx, AP); } // spr_batched template <> hipblasStatus_t hipblasSprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, float* const AP[], int batchCount) { return hipblasSsprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batchCount); } template <> hipblasStatus_t hipblasSprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, double* const AP[], int batchCount) { return hipblasDsprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batchCount); } template <> hipblasStatus_t hipblasSprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const AP[], int batchCount) { return hipblasCsprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batchCount); } template <> hipblasStatus_t hipblasSprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const AP[], int batchCount) { return hipblasZsprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batchCount); } // spr_strided_batched template <> hipblasStatus_t hipblasSprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, float* AP, hipblasStride strideAP, int batchCount) { return hipblasSsprStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasSprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, double* AP, hipblasStride strideAP, int batchCount) { return hipblasDsprStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasSprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasCsprStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasSprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasZsprStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, AP, strideAP, batchCount); } // spr2 template <> hipblasStatus_t hipblasSpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* AP) { return hipblasSspr2Fortran(handle, uplo, n, alpha, x, incx, y, incy, AP); } template <> hipblasStatus_t hipblasSpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* AP) { return hipblasDspr2Fortran(handle, uplo, n, alpha, x, incx, y, incy, AP); } // spr2_batched template <> hipblasStatus_t hipblasSpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, const float* const y[], int incy, float* const AP[], int batchCount) { return hipblasSspr2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, AP, batchCount); } template <> hipblasStatus_t hipblasSpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, const double* const y[], int incy, double* const AP[], int batchCount) { return hipblasDspr2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, AP, batchCount); } // spr2_strided_batched template <> hipblasStatus_t hipblasSpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, float* AP, hipblasStride strideAP, int batchCount) { return hipblasSspr2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, AP, strideAP, batchCount); } template <> hipblasStatus_t hipblasSpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, double* AP, hipblasStride strideAP, int batchCount) { return hipblasDspr2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, AP, strideAP, batchCount); } // symv template <> hipblasStatus_t hipblasSymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy) { return hipblasSsymvFortran(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy) { return hipblasDsymvFortran(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasCsymvFortran(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> hipblasStatus_t hipblasSymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZsymvFortran(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); } // symv_batched template <> hipblasStatus_t hipblasSymvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* y[], int incy, int batchCount) { return hipblasSsymvBatchedFortran( handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSymvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* y[], int incy, int batchCount) { return hipblasDsymvBatchedFortran( handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSymvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* y[], int incy, int batchCount) { return hipblasCsymvBatchedFortran( handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batchCount); } template <> hipblasStatus_t hipblasSymvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y[], int incy, int batchCount) { return hipblasZsymvBatchedFortran( handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy, batchCount); } // symv_strided_batched template <> hipblasStatus_t hipblasSymvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount) { return hipblasSsymvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSymvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount) { return hipblasDsymvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSymvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasCsymvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } template <> hipblasStatus_t hipblasSymvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasZsymvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, strideA, x, incx, stridex, beta, y, incy, stridey, batchCount); } // syr template <> hipblasStatus_t hipblasSyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* A, int lda) { return hipblasSsyrFortran(handle, uplo, n, alpha, x, incx, A, lda); } template <> hipblasStatus_t hipblasSyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* A, int lda) { return hipblasDsyrFortran(handle, uplo, n, alpha, x, incx, A, lda); } template <> hipblasStatus_t hipblasSyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* A, int lda) { return hipblasCsyrFortran(handle, uplo, n, alpha, x, incx, A, lda); } template <> hipblasStatus_t hipblasSyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda) { return hipblasZsyrFortran(handle, uplo, n, alpha, x, incx, A, lda); } // syr_batched template <> hipblasStatus_t hipblasSyrBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, float* const A[], int lda, int batch_count) { return hipblasSsyrBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batch_count); } template <> hipblasStatus_t hipblasSyrBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, double* const A[], int lda, int batch_count) { return hipblasDsyrBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batch_count); } template <> hipblasStatus_t hipblasSyrBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const A[], int lda, int batch_count) { return hipblasCsyrBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batch_count); } template <> hipblasStatus_t hipblasSyrBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const A[], int lda, int batch_count) { return hipblasZsyrBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batch_count); } // syr_strided_batched template <> hipblasStatus_t hipblasSyrStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, float* A, int lda, hipblasStride strideA, int batch_count) { return hipblasSsyrStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasSyrStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, double* A, int lda, hipblasStride strideA, int batch_count) { return hipblasDsyrStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasSyrStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasCsyrStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batch_count); } template <> hipblasStatus_t hipblasSyrStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasZsyrStridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, A, lda, strideA, batch_count); } // syr2 template <> hipblasStatus_t hipblasSyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda) { return hipblasSsyr2Fortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasSyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda) { return hipblasDsyr2Fortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasSyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCsyr2Fortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } template <> hipblasStatus_t hipblasSyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZsyr2Fortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda); } // syr2_batched template <> hipblasStatus_t hipblasSyr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, const float* const y[], int incy, float* const A[], int lda, int batchCount) { return hipblasSsyr2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } template <> hipblasStatus_t hipblasSyr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, const double* const y[], int incy, double* const A[], int lda, int batchCount) { return hipblasDsyr2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } template <> hipblasStatus_t hipblasSyr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batchCount) { return hipblasCsyr2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } template <> hipblasStatus_t hipblasSyr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batchCount) { return hipblasZsyr2BatchedFortran(handle, uplo, n, alpha, x, incx, y, incy, A, lda, batchCount); } // syr2_strided_batched template <> hipblasStatus_t hipblasSyr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, float* A, int lda, hipblasStride strideA, int batchCount) { return hipblasSsyr2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasSyr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, double* A, int lda, hipblasStride strideA, int batchCount) { return hipblasDsyr2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasSyr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasCsyr2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } template <> hipblasStatus_t hipblasSyr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasZsyr2StridedBatchedFortran( handle, uplo, n, alpha, x, incx, stridex, y, incy, stridey, A, lda, strideA, batchCount); } // trsv template <> hipblasStatus_t hipblasTrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, float* x, int incx) { return hipblasStrsvFortran(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, double* x, int incx) { return hipblasDtrsvFortran(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtrsvFortran(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtrsvFortran(handle, uplo, transA, diag, m, A, lda, x, incx); } // trsv_batched template <> hipblasStatus_t hipblasTrsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const A[], int lda, float* const x[], int incx, int batch_count) { return hipblasStrsvBatchedFortran(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const A[], int lda, double* const x[], int incx, int batch_count) { return hipblasDtrsvBatchedFortran(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCtrsvBatchedFortran(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZtrsvBatchedFortran(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } // trsv_strided_batched template <> hipblasStatus_t hipblasTrsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, hipblasStride strideA, float* x, int incx, hipblasStride stridex, int batch_count) { return hipblasStrsvStridedBatchedFortran( handle, uplo, transA, diag, m, A, lda, strideA, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasTrsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, hipblasStride strideA, double* x, int incx, hipblasStride stridex, int batch_count) { return hipblasDtrsvStridedBatchedFortran( handle, uplo, transA, diag, m, A, lda, strideA, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasTrsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasCtrsvStridedBatchedFortran( handle, uplo, transA, diag, m, A, lda, strideA, x, incx, stridex, batch_count); } template <> hipblasStatus_t hipblasTrsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasZtrsvStridedBatchedFortran( handle, uplo, transA, diag, m, A, lda, strideA, x, incx, stridex, batch_count); } // tbmv template <> hipblasStatus_t hipblasTbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, float* x, int incx) { return hipblasStbmvFortran(handle, uplo, transA, diag, m, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, double* x, int incx) { return hipblasDtbmvFortran(handle, uplo, transA, diag, m, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtbmvFortran(handle, uplo, transA, diag, m, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtbmvFortran(handle, uplo, transA, diag, m, k, A, lda, x, incx); } // tbmv_batched template <> hipblasStatus_t hipblasTbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* const A[], int lda, float* const x[], int incx, int batch_count) { return hipblasStbmvBatchedFortran( handle, uplo, transA, diag, m, k, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* const A[], int lda, double* const x[], int incx, int batch_count) { return hipblasDtbmvBatchedFortran( handle, uplo, transA, diag, m, k, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCtbmvBatchedFortran( handle, uplo, transA, diag, m, k, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZtbmvBatchedFortran( handle, uplo, transA, diag, m, k, A, lda, x, incx, batch_count); } // tbmv_strided_batched template <> hipblasStatus_t hipblasTbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, hipblasStride stride_a, float* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasStbmvStridedBatchedFortran( handle, uplo, transA, diag, m, k, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, hipblasStride stride_a, double* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasDtbmvStridedBatchedFortran( handle, uplo, transA, diag, m, k, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasStride stride_a, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasCtbmvStridedBatchedFortran( handle, uplo, transA, diag, m, k, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasZtbmvStridedBatchedFortran( handle, uplo, transA, diag, m, k, A, lda, stride_a, x, incx, stride_x, batch_count); } // tbsv template <> hipblasStatus_t hipblasTbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float* A, int lda, float* x, int incx) { return hipblasStbsvFortran(handle, uplo, transA, diag, n, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double* A, int lda, double* x, int incx) { return hipblasDtbsvFortran(handle, uplo, transA, diag, n, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtbsvFortran(handle, uplo, transA, diag, n, k, A, lda, x, incx); } template <> hipblasStatus_t hipblasTbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtbsvFortran(handle, uplo, transA, diag, n, k, A, lda, x, incx); } // tbsv_batched template <> hipblasStatus_t hipblasTbsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float* const A[], int lda, float* const x[], int incx, int batchCount) { return hipblasStbsvBatchedFortran( handle, uplo, transA, diag, n, k, A, lda, x, incx, batchCount); } template <> hipblasStatus_t hipblasTbsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double* const A[], int lda, double* const x[], int incx, int batchCount) { return hipblasDtbsvBatchedFortran( handle, uplo, transA, diag, n, k, A, lda, x, incx, batchCount); } template <> hipblasStatus_t hipblasTbsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batchCount) { return hipblasCtbsvBatchedFortran( handle, uplo, transA, diag, n, k, A, lda, x, incx, batchCount); } template <> hipblasStatus_t hipblasTbsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batchCount) { return hipblasZtbsvBatchedFortran( handle, uplo, transA, diag, n, k, A, lda, x, incx, batchCount); } // tbsv_strided_batched template <> hipblasStatus_t hipblasTbsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float* A, int lda, hipblasStride strideA, float* x, int incx, hipblasStride stridex, int batchCount) { return hipblasStbsvStridedBatchedFortran( handle, uplo, transA, diag, n, k, A, lda, strideA, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTbsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double* A, int lda, hipblasStride strideA, double* x, int incx, hipblasStride stridex, int batchCount) { return hipblasDtbsvStridedBatchedFortran( handle, uplo, transA, diag, n, k, A, lda, strideA, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTbsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasCtbsvStridedBatchedFortran( handle, uplo, transA, diag, n, k, A, lda, strideA, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTbsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasZtbsvStridedBatchedFortran( handle, uplo, transA, diag, n, k, A, lda, strideA, x, incx, stridex, batchCount); } // tpmv template <> hipblasStatus_t hipblasTpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, float* x, int incx) { return hipblasStpmvFortran(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, double* x, int incx) { return hipblasDtpmvFortran(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx) { return hipblasCtpmvFortran(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx) { return hipblasZtpmvFortran(handle, uplo, transA, diag, m, AP, x, incx); } // tpmv_batched template <> hipblasStatus_t hipblasTpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const AP[], float* const x[], int incx, int batchCount) { return hipblasStpmvBatchedFortran(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const AP[], double* const x[], int incx, int batchCount) { return hipblasDtpmvBatchedFortran(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const AP[], hipblasComplex* const x[], int incx, int batchCount) { return hipblasCtpmvBatchedFortran(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int incx, int batchCount) { return hipblasZtpmvBatchedFortran(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } // tpmv_strided_batched template <> hipblasStatus_t hipblasTpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, hipblasStride strideAP, float* x, int incx, hipblasStride stridex, int batchCount) { return hipblasStpmvStridedBatchedFortran( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, hipblasStride strideAP, double* x, int incx, hipblasStride stridex, int batchCount) { return hipblasDtpmvStridedBatchedFortran( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasCtpmvStridedBatchedFortran( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasZtpmvStridedBatchedFortran( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } // tpsv template <> hipblasStatus_t hipblasTpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, float* x, int incx) { return hipblasStpsvFortran(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, double* x, int incx) { return hipblasDtpsvFortran(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx) { return hipblasCtpsvFortran(handle, uplo, transA, diag, m, AP, x, incx); } template <> hipblasStatus_t hipblasTpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx) { return hipblasZtpsvFortran(handle, uplo, transA, diag, m, AP, x, incx); } // tpsv_batched template <> hipblasStatus_t hipblasTpsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const AP[], float* const x[], int incx, int batchCount) { return hipblasStpsvBatchedFortran(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const AP[], double* const x[], int incx, int batchCount) { return hipblasDtpsvBatchedFortran(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const AP[], hipblasComplex* const x[], int incx, int batchCount) { return hipblasCtpsvBatchedFortran(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } template <> hipblasStatus_t hipblasTpsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int incx, int batchCount) { return hipblasZtpsvBatchedFortran(handle, uplo, transA, diag, m, AP, x, incx, batchCount); } // tpsv_strided_batched template <> hipblasStatus_t hipblasTpsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, hipblasStride strideAP, float* x, int incx, hipblasStride stridex, int batchCount) { return hipblasStpsvStridedBatchedFortran( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, hipblasStride strideAP, double* x, int incx, hipblasStride stridex, int batchCount) { return hipblasDtpsvStridedBatchedFortran( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasCtpsvStridedBatchedFortran( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } template <> hipblasStatus_t hipblasTpsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasZtpsvStridedBatchedFortran( handle, uplo, transA, diag, m, AP, strideAP, x, incx, stridex, batchCount); } // trmv template <> hipblasStatus_t hipblasTrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, float* x, int incx) { return hipblasStrmvFortran(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, double* x, int incx) { return hipblasDtrmvFortran(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtrmvFortran(handle, uplo, transA, diag, m, A, lda, x, incx); } template <> hipblasStatus_t hipblasTrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtrmvFortran(handle, uplo, transA, diag, m, A, lda, x, incx); } // trmv_batched template <> hipblasStatus_t hipblasTrmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const A[], int lda, float* const x[], int incx, int batch_count) { return hipblasStrmvBatchedFortran(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const A[], int lda, double* const x[], int incx, int batch_count) { return hipblasDtrmvBatchedFortran(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCtrmvBatchedFortran(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } template <> hipblasStatus_t hipblasTrmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZtrmvBatchedFortran(handle, uplo, transA, diag, m, A, lda, x, incx, batch_count); } // trmv_strided_batched template <> hipblasStatus_t hipblasTrmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, hipblasStride stride_a, float* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasStrmvStridedBatchedFortran( handle, uplo, transA, diag, m, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTrmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, hipblasStride stride_a, double* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasDtrmvStridedBatchedFortran( handle, uplo, transA, diag, m, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTrmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasStride stride_a, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasCtrmvStridedBatchedFortran( handle, uplo, transA, diag, m, A, lda, stride_a, x, incx, stride_x, batch_count); } template <> hipblasStatus_t hipblasTrmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasZtrmvStridedBatchedFortran( handle, uplo, transA, diag, m, A, lda, stride_a, x, incx, stride_x, batch_count); } // /* // * =========================================================================== // * level 3 BLAS // * =========================================================================== // */ // trtri template <> hipblasStatus_t hipblasTrtri(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, float* A, int lda, float* invA, int ldinvA) { return hipblasStrtriFortran(handle, uplo, diag, n, A, lda, invA, ldinvA); } template <> hipblasStatus_t hipblasTrtri(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, double* A, int lda, double* invA, int ldinvA) { return hipblasDtrtriFortran(handle, uplo, diag, n, A, lda, invA, ldinvA); } template <> hipblasStatus_t hipblasTrtri(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasComplex* A, int lda, hipblasComplex* invA, int ldinvA) { return hipblasCtrtriFortran(handle, uplo, diag, n, A, lda, invA, ldinvA); } template <> hipblasStatus_t hipblasTrtri(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* invA, int ldinvA) { return hipblasZtrtriFortran(handle, uplo, diag, n, A, lda, invA, ldinvA); } // trtri_batched template <> hipblasStatus_t hipblasTrtriBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, float* A[], int lda, float* invA[], int ldinvA, int batch_count) { return hipblasStrtriBatchedFortran(handle, uplo, diag, n, A, lda, invA, ldinvA, batch_count); } template <> hipblasStatus_t hipblasTrtriBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, double* A[], int lda, double* invA[], int ldinvA, int batch_count) { return hipblasDtrtriBatchedFortran(handle, uplo, diag, n, A, lda, invA, ldinvA, batch_count); } template <> hipblasStatus_t hipblasTrtriBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasComplex* A[], int lda, hipblasComplex* invA[], int ldinvA, int batch_count) { return hipblasCtrtriBatchedFortran(handle, uplo, diag, n, A, lda, invA, ldinvA, batch_count); } template <> hipblasStatus_t hipblasTrtriBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasDoubleComplex* A[], int lda, hipblasDoubleComplex* invA[], int ldinvA, int batch_count) { return hipblasZtrtriBatchedFortran(handle, uplo, diag, n, A, lda, invA, ldinvA, batch_count); } // trtri_strided_batched template <> hipblasStatus_t hipblasTrtriStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, float* A, int lda, hipblasStride stride_A, float* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasStrtriStridedBatchedFortran( handle, uplo, diag, n, A, lda, stride_A, invA, ldinvA, stride_invA, batch_count); } template <> hipblasStatus_t hipblasTrtriStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, double* A, int lda, hipblasStride stride_A, double* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasDtrtriStridedBatchedFortran( handle, uplo, diag, n, A, lda, stride_A, invA, ldinvA, stride_invA, batch_count); } template <> hipblasStatus_t hipblasTrtriStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasComplex* A, int lda, hipblasStride stride_A, hipblasComplex* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasCtrtriStridedBatchedFortran( handle, uplo, diag, n, A, lda, stride_A, invA, ldinvA, stride_invA, batch_count); } template <> hipblasStatus_t hipblasTrtriStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasDoubleComplex* A, int lda, hipblasStride stride_A, hipblasDoubleComplex* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasZtrtriStridedBatchedFortran( handle, uplo, diag, n, A, lda, stride_A, invA, ldinvA, stride_invA, batch_count); } // dgmm template <> hipblasStatus_t hipblasDgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc) { return hipblasSdgmmFortran(handle, side, m, n, A, lda, x, incx, C, ldc); } template <> hipblasStatus_t hipblasDgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* A, int lda, const double* x, int incx, double* C, int ldc) { return hipblasDdgmmFortran(handle, side, m, n, A, lda, x, incx, C, ldc); } template <> hipblasStatus_t hipblasDgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, hipblasComplex* C, int ldc) { return hipblasCdgmmFortran(handle, side, m, n, A, lda, x, incx, C, ldc); } template <> hipblasStatus_t hipblasDgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* C, int ldc) { return hipblasZdgmmFortran(handle, side, m, n, A, lda, x, incx, C, ldc); } // dgmm_batched template <> hipblasStatus_t hipblasDgmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* const A[], int lda, const float* const x[], int incx, float* const C[], int ldc, int batch_count) { return hipblasSdgmmBatchedFortran(handle, side, m, n, A, lda, x, incx, C, ldc, batch_count); } template <> hipblasStatus_t hipblasDgmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* const A[], int lda, const double* const x[], int incx, double* const C[], int ldc, int batch_count) { return hipblasDdgmmBatchedFortran(handle, side, m, n, A, lda, x, incx, C, ldc, batch_count); } template <> hipblasStatus_t hipblasDgmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, hipblasComplex* const C[], int ldc, int batch_count) { return hipblasCdgmmBatchedFortran(handle, side, m, n, A, lda, x, incx, C, ldc, batch_count); } template <> hipblasStatus_t hipblasDgmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const C[], int ldc, int batch_count) { return hipblasZdgmmBatchedFortran(handle, side, m, n, A, lda, x, incx, C, ldc, batch_count); } // dgmm_strided_batched template <> hipblasStatus_t hipblasDgmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* A, int lda, hipblasStride stride_A, const float* x, int incx, hipblasStride stride_x, float* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasSdgmmStridedBatchedFortran( handle, side, m, n, A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count); } template <> hipblasStatus_t hipblasDgmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* A, int lda, hipblasStride stride_A, const double* x, int incx, hipblasStride stride_x, double* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasDdgmmStridedBatchedFortran( handle, side, m, n, A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count); } template <> hipblasStatus_t hipblasDgmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* A, int lda, hipblasStride stride_A, const hipblasComplex* x, int incx, hipblasStride stride_x, hipblasComplex* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasCdgmmStridedBatchedFortran( handle, side, m, n, A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count); } template <> hipblasStatus_t hipblasDgmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* A, int lda, hipblasStride stride_A, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, hipblasDoubleComplex* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasZdgmmStridedBatchedFortran( handle, side, m, n, A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count); } // gemm template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasHalf* alpha, const hipblasHalf* A, int lda, const hipblasHalf* B, int ldb, const hipblasHalf* beta, hipblasHalf* C, int ldc) { return hipblasHgemmFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) { return hipblasSgemmFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc) { return hipblasDgemmFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCgemmFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZgemmFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // gemm_batched template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasHalf* alpha, const hipblasHalf* const A[], int lda, const hipblasHalf* const B[], int ldb, const hipblasHalf* beta, hipblasHalf* const C[], int ldc, int batch_count) { return hipblasHgemmBatchedFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batch_count) { return hipblasSgemmBatchedFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batch_count) { return hipblasDgemmBatchedFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batch_count) { return hipblasCgemmBatchedFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } template <> hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batch_count) { return hipblasZgemmBatchedFortran( handle, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batch_count); } // gemm_strided_batched template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasHalf* alpha, const hipblasHalf* A, int lda, int bsa, const hipblasHalf* B, int ldb, int bsb, const hipblasHalf* beta, hipblasHalf* C, int ldc, int bsc, int batch_count) { return hipblasHgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* A, int lda, int bsa, const float* B, int ldb, int bsb, const float* beta, float* C, int ldc, int bsc, int batch_count) { return hipblasSgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* A, int lda, int bsa, const double* B, int ldb, int bsb, const double* beta, double* C, int ldc, int bsc, int batch_count) { return hipblasDgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, int bsa, const hipblasComplex* B, int ldb, int bsb, const hipblasComplex* beta, hipblasComplex* C, int ldc, int bsc, int batch_count) { return hipblasCgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } template <> hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, int bsa, const hipblasDoubleComplex* B, int ldb, int bsb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, int bsc, int batch_count) { return hipblasZgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, A, lda, bsa, B, ldb, bsb, beta, C, ldc, bsc, batch_count); } // herk template <> hipblasStatus_t hipblasHerk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* A, int lda, const float* beta, hipblasComplex* C, int ldc) { return hipblasCherkFortran(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } template <> hipblasStatus_t hipblasHerk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* A, int lda, const double* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZherkFortran(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } // herk_batched template <> hipblasStatus_t hipblasHerkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* const A[], int lda, const float* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCherkBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasHerkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* const A[], int lda, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZherkBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } // herk_strided_batched template <> hipblasStatus_t hipblasHerkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCherkStridedBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasHerkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZherkStridedBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } // her2k template <> hipblasStatus_t hipblasHer2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const float* beta, hipblasComplex* C, int ldc) { return hipblasCher2kFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasHer2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const double* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZher2kFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // her2k_batched template <> hipblasStatus_t hipblasHer2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const float* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCher2kBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasHer2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZher2kBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // her2k_strided_batched template <> hipblasStatus_t hipblasHer2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCher2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasHer2kStridedBatched( hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZher2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // herkx template <> hipblasStatus_t hipblasHerkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const float* beta, hipblasComplex* C, int ldc) { return hipblasCherkxFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasHerkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const double* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZherkxFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // herkx_batched template <> hipblasStatus_t hipblasHerkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const float* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCherkxBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasHerkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZherkxBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // herkx_strided_batched template <> hipblasStatus_t hipblasHerkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCherkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasHerkxStridedBatched( hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZherkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // symm template <> hipblasStatus_t hipblasSymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) { return hipblasSsymmFortran(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc) { return hipblasDsymmFortran(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsymmFortran(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsymmFortran(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } // symm_batched template <> hipblasStatus_t hipblasSymmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount) { return hipblasSsymmBatchedFortran( handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSymmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount) { return hipblasDsymmBatchedFortran( handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSymmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsymmBatchedFortran( handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSymmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsymmBatchedFortran( handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // symm_strided_batched template <> hipblasStatus_t hipblasSymmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* B, int ldb, hipblasStride strideB, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSsymmStridedBatchedFortran(handle, side, uplo, m, n, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSymmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* B, int ldb, hipblasStride strideB, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDsymmStridedBatchedFortran(handle, side, uplo, m, n, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSymmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsymmStridedBatchedFortran(handle, side, uplo, m, n, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSymmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsymmStridedBatchedFortran(handle, side, uplo, m, n, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // syrk template <> hipblasStatus_t hipblasSyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc) { return hipblasSsyrkFortran(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, const double* beta, double* C, int ldc) { return hipblasDsyrkFortran(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsyrkFortran(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsyrkFortran(handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc); } // syrk_batched template <> hipblasStatus_t hipblasSyrkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* const A[], int lda, const float* beta, float* const C[], int ldc, int batchCount) { return hipblasSsyrkBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* const A[], int lda, const double* beta, double* const C[], int ldc, int batchCount) { return hipblasDsyrkBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsyrkBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsyrkBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, beta, C, ldc, batchCount); } // syrk_strided_batched template <> hipblasStatus_t hipblasSyrkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSsyrkStridedBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDsyrkStridedBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsyrkStridedBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsyrkStridedBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, strideA, beta, C, ldc, strideC, batchCount); } // syr2k template <> hipblasStatus_t hipblasSyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) { return hipblasSsyr2kFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc) { return hipblasDsyr2kFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsyr2kFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsyr2kFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // syr2k_batched template <> hipblasStatus_t hipblasSyr2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount) { return hipblasSsyr2kBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyr2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount) { return hipblasDsyr2kBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyr2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsyr2kBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyr2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsyr2kBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // syr2k_strided_batched template <> hipblasStatus_t hipblasSyr2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* B, int ldb, hipblasStride strideB, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSsyr2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyr2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* B, int ldb, hipblasStride strideB, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDsyr2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyr2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsyr2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyr2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsyr2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // syrkx template <> hipblasStatus_t hipblasSyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) { return hipblasSsyrkxFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc) { return hipblasDsyrkxFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsyrkxFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasSyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsyrkxFortran(handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // syrkx_batched template <> hipblasStatus_t hipblasSyrkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount) { return hipblasSsyrkxBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount) { return hipblasDsyrkxBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsyrkxBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasSyrkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsyrkxBatchedFortran( handle, uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // syrkx_strided_batched template <> hipblasStatus_t hipblasSyrkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* B, int ldb, hipblasStride strideB, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSsyrkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* B, int ldb, hipblasStride strideB, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDsyrkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsyrkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasSyrkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsyrkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // hemm template <> hipblasStatus_t hipblasHemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasChemmFortran(handle, side, uplo, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> hipblasStatus_t hipblasHemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZhemmFortran(handle, side, uplo, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // hemm_batched template <> hipblasStatus_t hipblasHemmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasChemmBatchedFortran( handle, side, uplo, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } template <> hipblasStatus_t hipblasHemmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZhemmBatchedFortran( handle, side, uplo, n, k, alpha, A, lda, B, ldb, beta, C, ldc, batchCount); } // hemm_strided_batched template <> hipblasStatus_t hipblasHemmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasChemmStridedBatchedFortran(handle, side, uplo, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasHemmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZhemmStridedBatchedFortran(handle, side, uplo, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount); } // trmm template <> hipblasStatus_t hipblasTrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, float* B, int ldb) { return hipblasStrmmFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, double* B, int ldb) { return hipblasDtrmmFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasComplex* B, int ldb) { return hipblasCtrmmFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb) { return hipblasZtrmmFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } // trmm_batched template <> hipblasStatus_t hipblasTrmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount) { return hipblasStrmmBatchedFortran( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batchCount); } template <> hipblasStatus_t hipblasTrmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount) { return hipblasDtrmmBatchedFortran( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batchCount); } template <> hipblasStatus_t hipblasTrmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, hipblasComplex* const B[], int ldb, int batchCount) { return hipblasCtrmmBatchedFortran( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batchCount); } template <> hipblasStatus_t hipblasTrmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const B[], int ldb, int batchCount) { return hipblasZtrmmBatchedFortran( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batchCount); } // trmm_strided_batched template <> hipblasStatus_t hipblasTrmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, float* B, int ldb, hipblasStride strideB, int batchCount) { return hipblasStrmmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batchCount); } template <> hipblasStatus_t hipblasTrmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, double* B, int ldb, hipblasStride strideB, int batchCount) { return hipblasDtrmmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batchCount); } template <> hipblasStatus_t hipblasTrmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* B, int ldb, hipblasStride strideB, int batchCount) { return hipblasCtrmmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batchCount); } template <> hipblasStatus_t hipblasTrmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* B, int ldb, hipblasStride strideB, int batchCount) { return hipblasZtrmmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batchCount); } // trsm template <> hipblasStatus_t hipblasTrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* A, int lda, float* B, int ldb) { return hipblasStrsmFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* A, int lda, double* B, int ldb) { return hipblasDtrsmFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* A, int lda, hipblasComplex* B, int ldb) { return hipblasCtrsmFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } template <> hipblasStatus_t hipblasTrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb) { return hipblasZtrsmFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb); } // trsm_batched template <> hipblasStatus_t hipblasTrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const A[], int lda, float* B[], int ldb, int batch_count) { return hipblasStrsmBatchedFortran( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batch_count); } template <> hipblasStatus_t hipblasTrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const A[], int lda, double* B[], int ldb, int batch_count) { return hipblasDtrsmBatchedFortran( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batch_count); } template <> hipblasStatus_t hipblasTrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const A[], int lda, hipblasComplex* B[], int ldb, int batch_count) { return hipblasCtrsmBatchedFortran( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batch_count); } template <> hipblasStatus_t hipblasTrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* B[], int ldb, int batch_count) { return hipblasZtrsmBatchedFortran( handle, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb, batch_count); } // trsm_strided_batched template <> hipblasStatus_t hipblasTrsmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* A, int lda, hipblasStride strideA, float* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasStrsmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batch_count); } template <> hipblasStatus_t hipblasTrsmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* A, int lda, hipblasStride strideA, double* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasDtrsmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batch_count); } template <> hipblasStatus_t hipblasTrsmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasCtrsmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batch_count); } template <> hipblasStatus_t hipblasTrsmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasZtrsmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, strideA, B, ldb, strideB, batch_count); } // geam template <> hipblasStatus_t hipblasGeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc) { return hipblasSgeamFortran(handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } template <> hipblasStatus_t hipblasGeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc) { return hipblasDgeamFortran(handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } template <> hipblasStatus_t hipblasGeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* beta, const hipblasComplex* B, int ldb, hipblasComplex* C, int ldc) { return hipblasCgeamFortran(handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } template <> hipblasStatus_t hipblasGeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex* C, int ldc) { return hipblasZgeamFortran(handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } // geam_batched template <> hipblasStatus_t hipblasGeamBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float* alpha, const float* const A[], int lda, const float* beta, const float* const B[], int ldb, float* const C[], int ldc, int batchCount) { return hipblasSgeamBatchedFortran( handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc, batchCount); } template <> hipblasStatus_t hipblasGeamBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double* alpha, const double* const A[], int lda, const double* beta, const double* const B[], int ldb, double* const C[], int ldc, int batchCount) { return hipblasDgeamBatchedFortran( handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc, batchCount); } template <> hipblasStatus_t hipblasGeamBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* beta, const hipblasComplex* const B[], int ldb, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCgeamBatchedFortran( handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc, batchCount); } template <> hipblasStatus_t hipblasGeamBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* const B[], int ldb, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZgeamBatchedFortran( handle, transA, transB, m, n, alpha, A, lda, beta, B, ldb, C, ldc, batchCount); } // geam_strided_batched template <> hipblasStatus_t hipblasGeamStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* beta, const float* B, int ldb, hipblasStride strideB, float* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasSgeamStridedBatchedFortran(handle, transA, transB, m, n, alpha, A, lda, strideA, beta, B, ldb, strideB, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasGeamStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* beta, const double* B, int ldb, hipblasStride strideB, double* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasDgeamStridedBatchedFortran(handle, transA, transB, m, n, alpha, A, lda, strideA, beta, B, ldb, strideB, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasGeamStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* beta, const hipblasComplex* B, int ldb, hipblasStride strideB, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCgeamStridedBatchedFortran(handle, transA, transB, m, n, alpha, A, lda, strideA, beta, B, ldb, strideB, C, ldc, strideC, batchCount); } template <> hipblasStatus_t hipblasGeamStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZgeamStridedBatchedFortran(handle, transA, transB, m, n, alpha, A, lda, strideA, beta, B, ldb, strideB, C, ldc, strideC, batchCount); } #ifdef __HIP_PLATFORM_SOLVER__ // getrf template <> hipblasStatus_t hipblasGetrf( hipblasHandle_t handle, const int n, float* A, const int lda, int* ipiv, int* info) { return hipblasSgetrfFortran(handle, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGetrf( hipblasHandle_t handle, const int n, double* A, const int lda, int* ipiv, int* info) { return hipblasDgetrfFortran(handle, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGetrf( hipblasHandle_t handle, const int n, hipblasComplex* A, const int lda, int* ipiv, int* info) { return hipblasCgetrfFortran(handle, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGetrf(hipblasHandle_t handle, const int n, hipblasDoubleComplex* A, const int lda, int* ipiv, int* info) { return hipblasZgetrfFortran(handle, n, A, lda, ipiv, info); } // getrf_batched template <> hipblasStatus_t hipblasGetrfBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasSgetrfBatchedFortran(handle, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGetrfBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasDgetrfBatchedFortran(handle, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGetrfBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasCgetrfBatchedFortran(handle, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGetrfBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasZgetrfBatchedFortran(handle, n, A, lda, ipiv, info, batchCount); } // getrf_strided_batched template <> hipblasStatus_t hipblasGetrfStridedBatched(hipblasHandle_t handle, const int n, float* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasSgetrfStridedBatchedFortran( handle, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGetrfStridedBatched(hipblasHandle_t handle, const int n, double* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasDgetrfStridedBatchedFortran( handle, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGetrfStridedBatched(hipblasHandle_t handle, const int n, hipblasComplex* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasCgetrfStridedBatchedFortran( handle, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGetrfStridedBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasZgetrfStridedBatchedFortran( handle, n, A, lda, strideA, ipiv, strideP, info, batchCount); } // getrs template <> hipblasStatus_t hipblasGetrs(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* A, const int lda, const int* ipiv, float* B, const int ldb, int* info) { return hipblasSgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); } template <> hipblasStatus_t hipblasGetrs(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* A, const int lda, const int* ipiv, double* B, const int ldb, int* info) { return hipblasDgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); } template <> hipblasStatus_t hipblasGetrs(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* A, const int lda, const int* ipiv, hipblasComplex* B, const int ldb, int* info) { return hipblasCgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); } template <> hipblasStatus_t hipblasGetrs(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const int* ipiv, hipblasDoubleComplex* B, const int ldb, int* info) { return hipblasZgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); } // getrs_batched template <> hipblasStatus_t hipblasGetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount) { return hipblasSgetrsBatchedFortran( handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info, batchCount); } template <> hipblasStatus_t hipblasGetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount) { return hipblasDgetrsBatchedFortran( handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info, batchCount); } template <> hipblasStatus_t hipblasGetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount) { return hipblasCgetrsBatchedFortran( handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info, batchCount); } template <> hipblasStatus_t hipblasGetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount) { return hipblasZgetrsBatchedFortran( handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info, batchCount); } // getrs_strided_batched template <> hipblasStatus_t hipblasGetrsStridedBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, float* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasSgetrsStridedBatchedFortran( handle, trans, n, nrhs, A, lda, strideA, ipiv, strideP, B, ldb, strideB, info, batchCount); } template <> hipblasStatus_t hipblasGetrsStridedBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, double* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasDgetrsStridedBatchedFortran( handle, trans, n, nrhs, A, lda, strideA, ipiv, strideP, B, ldb, strideB, info, batchCount); } template <> hipblasStatus_t hipblasGetrsStridedBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, hipblasComplex* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasCgetrsStridedBatchedFortran( handle, trans, n, nrhs, A, lda, strideA, ipiv, strideP, B, ldb, strideB, info, batchCount); } template <> hipblasStatus_t hipblasGetrsStridedBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, hipblasDoubleComplex* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasZgetrsStridedBatchedFortran( handle, trans, n, nrhs, A, lda, strideA, ipiv, strideP, B, ldb, strideB, info, batchCount); } // getri_batched template <> hipblasStatus_t hipblasGetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount) { return hipblasSgetriBatchedFortran(handle, n, A, lda, ipiv, C, ldc, info, batchCount); } template <> hipblasStatus_t hipblasGetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount) { return hipblasDgetriBatchedFortran(handle, n, A, lda, ipiv, C, ldc, info, batchCount); } template <> hipblasStatus_t hipblasGetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount) { return hipblasCgetriBatchedFortran(handle, n, A, lda, ipiv, C, ldc, info, batchCount); } template <> hipblasStatus_t hipblasGetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount) { return hipblasZgetriBatchedFortran(handle, n, A, lda, ipiv, C, ldc, info, batchCount); } // geqrf template <> hipblasStatus_t hipblasGeqrf(hipblasHandle_t handle, const int m, const int n, float* A, const int lda, float* ipiv, int* info) { return hipblasSgeqrfFortran(handle, m, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGeqrf(hipblasHandle_t handle, const int m, const int n, double* A, const int lda, double* ipiv, int* info) { return hipblasDgeqrfFortran(handle, m, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGeqrf(hipblasHandle_t handle, const int m, const int n, hipblasComplex* A, const int lda, hipblasComplex* ipiv, int* info) { return hipblasCgeqrfFortran(handle, m, n, A, lda, ipiv, info); } template <> hipblasStatus_t hipblasGeqrf(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* A, const int lda, hipblasDoubleComplex* ipiv, int* info) { return hipblasZgeqrfFortran(handle, m, n, A, lda, ipiv, info); } // geqrf_batched template <> hipblasStatus_t hipblasGeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount) { return hipblasSgeqrfBatchedFortran(handle, m, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfBatched(hipblasHandle_t handle, const int m, const int n, double* const A[], const int lda, double* const ipiv[], int* info, const int batchCount) { return hipblasDgeqrfBatchedFortran(handle, m, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasComplex* const A[], const int lda, hipblasComplex* const ipiv[], int* info, const int batchCount) { return hipblasCgeqrfBatchedFortran(handle, m, n, A, lda, ipiv, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const ipiv[], int* info, const int batchCount) { return hipblasZgeqrfBatchedFortran(handle, m, n, A, lda, ipiv, info, batchCount); } // geqrf_strided_batched template <> hipblasStatus_t hipblasGeqrfStridedBatched(hipblasHandle_t handle, const int m, const int n, float* A, const int lda, const hipblasStride strideA, float* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasSgeqrfStridedBatchedFortran( handle, m, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfStridedBatched(hipblasHandle_t handle, const int m, const int n, double* A, const int lda, const hipblasStride strideA, double* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasDgeqrfStridedBatchedFortran( handle, m, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfStridedBatched(hipblasHandle_t handle, const int m, const int n, hipblasComplex* A, const int lda, const hipblasStride strideA, hipblasComplex* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasCgeqrfStridedBatchedFortran( handle, m, n, A, lda, strideA, ipiv, strideP, info, batchCount); } template <> hipblasStatus_t hipblasGeqrfStridedBatched(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, hipblasDoubleComplex* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasZgeqrfStridedBatchedFortran( handle, m, n, A, lda, strideA, ipiv, strideP, info, batchCount); } // gels template <> hipblasStatus_t hipblasGels(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float* A, const int lda, float* B, const int ldb, int* info, int* deviceInfo) { return hipblasSgelsFortran(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo); } template <> hipblasStatus_t hipblasGels(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double* A, const int lda, double* B, const int ldb, int* info, int* deviceInfo) { return hipblasDgelsFortran(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo); } template <> hipblasStatus_t hipblasGels(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* A, const int lda, hipblasComplex* B, const int ldb, int* info, int* deviceInfo) { return hipblasCgelsFortran(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo); } template <> hipblasStatus_t hipblasGels(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, hipblasDoubleComplex* B, const int ldb, int* info, int* deviceInfo) { return hipblasZgelsFortran(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo); } // gelsBatched template <> hipblasStatus_t hipblasGelsBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float* const A[], const int lda, float* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasSgelsBatchedFortran( handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double* const A[], const int lda, double* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasDgelsBatchedFortran( handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* const A[], const int lda, hipblasComplex* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasCgelsBatchedFortran( handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasZgelsBatchedFortran( handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount); } // gelsStridedBatched template <> hipblasStatus_t hipblasGelsStridedBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float* A, const int lda, const hipblasStride strideA, float* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasSgelsStridedBatchedFortran( handle, trans, m, n, nrhs, A, lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsStridedBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double* A, const int lda, const hipblasStride strideA, double* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasDgelsStridedBatchedFortran( handle, trans, m, n, nrhs, A, lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsStridedBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* A, const int lda, const hipblasStride strideA, hipblasComplex* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasCgelsStridedBatchedFortran( handle, trans, m, n, nrhs, A, lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount); } template <> hipblasStatus_t hipblasGelsStridedBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, hipblasDoubleComplex* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasZgelsStridedBatchedFortran( handle, trans, m, n, nrhs, A, lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount); } #endif hipBLAS-rocm-5.5.1/clients/common/near.cpp000066400000000000000000000343771434647641600203210ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************ */ #include "near.h" #include "hipblas.h" #include "hipblas_vector.hpp" #include "utility.h" /* ========================================Gtest Unit Check * ==================================================== */ /*! \brief Template: gtest unit compare two matrices float/double/complex */ // Do not put a wrapper over ASSERT_FLOAT_EQ, sincer assert exit the current function NOT the test // case // a wrapper will cause the loop keep going #ifndef GOOGLE_TEST #define NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, err, NEAR_ASSERT) #define NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, err, NEAR_ASSERT) #else #define NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, err, NEAR_ASSERT) \ do \ { \ for(size_t k = 0; k < batch_count; k++) \ for(size_t j = 0; j < N; j++) \ for(size_t i = 0; i < M; i++) \ if(hipblas_isnan(hCPU[i + j * lda + k * strideA])) \ { \ ASSERT_TRUE(hipblas_isnan(hGPU[i + j * lda + k * strideA])); \ } \ else \ { \ NEAR_ASSERT(hCPU[i + j * lda + k * strideA], \ hGPU[i + j * lda + k * strideA], \ err); \ } \ } while(0) #define NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, err, NEAR_ASSERT) \ do \ { \ for(size_t k = 0; k < batch_count; k++) \ for(size_t j = 0; j < N; j++) \ for(size_t i = 0; i < M; i++) \ if(hipblas_isnan(hCPU[k][i + j * lda])) \ { \ ASSERT_TRUE(hipblas_isnan(hGPU[k][i + j * lda])); \ } \ else \ { \ NEAR_ASSERT(hCPU[k][i + j * lda], hGPU[k][i + j * lda], err); \ } \ } while(0) #endif #define NEAR_ASSERT_HALF(a, b, err) ASSERT_NEAR(half_to_float(a), half_to_float(b), err) #define NEAR_ASSERT_BF16(a, b, err) ASSERT_NEAR(bfloat16_to_float(a), bfloat16_to_float(b), err) #define NEAR_ASSERT_COMPLEX(a, b, err) \ do \ { \ auto ta = (a), tb = (b); \ ASSERT_NEAR(ta.real(), tb.real(), err); \ ASSERT_NEAR(ta.imag(), tb.imag(), err); \ } while(0) template <> void near_check_general(int M, int N, int lda, float* hCPU, float* hGPU, double abs_error) { NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int M, int N, int lda, double* hCPU, double* hGPU, double abs_error) { NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general( int M, int N, int lda, hipblasHalf* hCPU, hipblasHalf* hGPU, double abs_error) { NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF); } template <> void near_check_general( int M, int N, int lda, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU, double abs_error) { NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16); } template <> void near_check_general( int M, int N, int lda, hipblasComplex* hCPU, hipblasComplex* hGPU, double abs_error) { abs_error *= sqrthalf; NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general( int M, int N, int lda, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU, double abs_error) { abs_error *= sqrthalf; NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, float* hCPU, float* hGPU, double abs_error) { NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, double* hCPU, double* hGPU, double abs_error) { NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, hipblasHalf* hCPU, hipblasHalf* hGPU, double abs_error) { NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU, double abs_error) { NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, hipblasComplex* hCPU, hipblasComplex* hGPU, double abs_error) { abs_error *= sqrthalf; NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU, double abs_error) { abs_error *= sqrthalf; NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF); } template <> void near_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16); } template <> void near_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { abs_error *= sqrthalf; NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { abs_error *= sqrthalf; NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasHalf* hCPU[], hipblasHalf* hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasBfloat16* hCPU[], hipblasBfloat16* hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16); } template <> void near_check_general( int M, int N, int batch_count, int lda, float* hCPU[], float* hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general( int M, int N, int batch_count, int lda, double* hCPU[], double* hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasComplex* hCPU[], hipblasComplex* hGPU[], double abs_error) { abs_error *= sqrthalf; NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int M, int N, int batch_count, int lda, hipblasDoubleComplex* hCPU[], hipblasDoubleComplex* hGPU[], double abs_error) { abs_error *= sqrthalf; NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } hipBLAS-rocm-5.5.1/clients/common/norm.cpp000066400000000000000000000235451434647641600203420ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************ */ #include "norm.h" #include "cblas.h" #include "hipblas.h" #include /* ===================================================================== README: Norm check: norm(A-B)/norm(A), evaluate relative error Numerically, it is recommended by lapack. Call lapack fortran routines that do not exsit in cblas library. No special header is required. But need to declare function prototype All the functions are fortran and should append underscore (_) while declaring prototype and calling. xlange and xaxpy prototype are like following =================================================================== */ #ifdef __cplusplus extern "C" { #endif float slange_(char* norm_type, int* m, int* n, float* A, int* lda, float* work); double dlange_(char* norm_type, int* m, int* n, double* A, int* lda, double* work); float clange_(char* norm_type, int* m, int* n, hipblasComplex* A, int* lda, float* work); double zlange_(char* norm_type, int* m, int* n, hipblasDoubleComplex* A, int* lda, double* work); float slansy_(char* norm_type, char* uplo, int* n, float* A, int* lda, float* work); double dlansy_(char* norm_type, char* uplo, int* n, double* A, int* lda, double* work); // float clanhe_(char* norm_type, char* uplo, int* n, hipblasComplex* A, int* lda, float* work); // double zlanhe_(char* norm_type, char* uplo, int* n, hipblasDoubleComplex* A, int* lda, double* // work); void saxpy_(int* n, float* alpha, float* x, int* incx, float* y, int* incy); void daxpy_(int* n, double* alpha, double* x, int* incx, double* y, int* incy); void caxpy_( int* n, hipblasComplex* alpha, hipblasComplex* x, int* incx, hipblasComplex* y, int* incy); void zaxpy_(int* n, hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int* incx, hipblasDoubleComplex* y, int* incy); #ifdef __cplusplus } #endif /* ============================Norm Check for General Matrix: float/double/complex template * speciliazation ======================================= */ /*! \brief compare the norm error of two matrices hCPU & hGPU */ template <> double norm_check_general(char norm_type, int M, int N, int lda, float* hCPU, float* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly float work; int incx = 1; float alpha = -1.0f; int size = lda * N; float cpu_norm = slange_(&norm_type, &M, &N, hCPU, &lda, &work); saxpy_(&size, &alpha, hCPU, &incx, hGPU, &incx); float error = slange_(&norm_type, &M, &N, hGPU, &lda, &work) / cpu_norm; return (double)error; } template <> double norm_check_general(char norm_type, int M, int N, int lda, double* hCPU, double* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly double work[1]; int incx = 1; double alpha = -1.0; int size = lda * N; double cpu_norm = dlange_(&norm_type, &M, &N, hCPU, &lda, work); daxpy_(&size, &alpha, hCPU, &incx, hGPU, &incx); double error = dlange_(&norm_type, &M, &N, hGPU, &lda, work) / cpu_norm; return error; } template <> double norm_check_general( char norm_type, int M, int N, int lda, hipblasComplex* hCPU, hipblasComplex* hGPU) { //norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly float work[1]; int incx = 1; hipblasComplex alpha = -1.0f; int size = lda * N; float cpu_norm = clange_(&norm_type, &M, &N, hCPU, &lda, work); caxpy_(&size, &alpha, hCPU, &incx, hGPU, &incx); float error = clange_(&norm_type, &M, &N, hGPU, &lda, work) / cpu_norm; return (double)error; } template <> double norm_check_general( char norm_type, int M, int N, int lda, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU) { //norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly double work[1]; int incx = 1; hipblasDoubleComplex alpha = -1.0; int size = lda * N; double cpu_norm = zlange_(&norm_type, &M, &N, hCPU, &lda, work); zaxpy_(&size, &alpha, hCPU, &incx, hGPU, &incx); double error = zlange_(&norm_type, &M, &N, hGPU, &lda, work) / cpu_norm; return error; } template <> double norm_check_general( char norm_type, int M, int N, int lda, hipblasHalf* hCPU, hipblasHalf* hGPU) { // norm type can be 'M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector hCPU_double(N * lda); host_vector hGPU_double(N * lda); for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { hCPU_double[i + j * lda] = hCPU[i + j * lda]; hGPU_double[i + j * lda] = hGPU[i + j * lda]; } } return norm_check_general(norm_type, M, N, lda, hCPU_double, hGPU_double); } template <> double norm_check_general( char norm_type, int M, int N, int lda, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU) { // norm type can be 'M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector hCPU_double(N * lda); host_vector hGPU_double(N * lda); for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { hCPU_double[i + j * lda] = bfloat16_to_float(hCPU[i + j * lda]); hGPU_double[i + j * lda] = bfloat16_to_float(hGPU[i + j * lda]); } } return norm_check_general(norm_type, M, N, lda, hCPU_double, hGPU_double); } template <> double norm_check_general(char norm_type, int M, int N, int lda, int32_t* hCPU, int32_t* hGPU) { // norm type can be 'M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector hCPU_float(N * lda); host_vector hGPU_float(N * lda); for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { hCPU_float[i + j * lda] = (hCPU[i + j * lda]); hGPU_float[i + j * lda] = (hGPU[i + j * lda]); } } return norm_check_general(norm_type, M, N, lda, hCPU_float, hGPU_float); } /* ============================Norm Check for Symmetric Matrix: float/double/complex template * speciliazation ======================================= */ /*! \brief compare the norm error of two hermitian/symmetric matrices hCPU & hGPU */ template <> double norm_check_symmetric(char norm_type, char uplo, int N, int lda, float* hCPU, float* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly float work[1]; int incx = 1; float alpha = -1.0f; int size = lda * N; float cpu_norm = slansy_(&norm_type, &uplo, &N, hCPU, &lda, work); saxpy_(&size, &alpha, hCPU, &incx, hGPU, &incx); float error = slansy_(&norm_type, &uplo, &N, hGPU, &lda, work) / cpu_norm; return (double)error; } template <> double norm_check_symmetric( char norm_type, char uplo, int N, int lda, double* hCPU, double* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly double work[1]; int incx = 1; double alpha = -1.0; int size = lda * N; double cpu_norm = dlansy_(&norm_type, &uplo, &N, hCPU, &lda, work); daxpy_(&size, &alpha, hCPU, &incx, hGPU, &incx); double error = dlansy_(&norm_type, &uplo, &N, hGPU, &lda, work) / cpu_norm; return error; } // template<> // double norm_check_symmetric(char norm_type, char uplo, int N, int lda, hipblasComplex // *hCPU, hipblasComplex *hGPU) //{ ////norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly // // float work[1]; // int incx = 1; // hipblasComplex alpha = -1.0f; // int size = lda * N; // // float cpu_norm = clanhe_(&norm_type, &uplo, &N, hCPU, &lda, work); // caxpy_(&size, &alpha, hCPU, &incx, hGPU, &incx); // // float error = clanhe_(&norm_type, &uplo, &N, hGPU, &lda, work)/cpu_norm; // // return (double)error; //} // // template<> // double norm_check_symmetric(char norm_type, char uplo, int N, int lda, // hipblasDoubleComplex *hCPU, hipblasDoubleComplex *hGPU) //{ ////norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly // // double work[1]; // int incx = 1; // hipblasDoubleComplex alpha = -1.0; // int size = lda * N; // // double cpu_norm = zlanhe_(&norm_type, &uplo, &N, hCPU, &lda, work); // zaxpy_(&size, &alpha, hCPU, &incx, hGPU, &incx); // // double error = zlanhe_(&norm_type, &uplo, &N, hGPU, &lda, work)/cpu_norm; // // return error; //} hipBLAS-rocm-5.5.1/clients/common/unit.cpp000066400000000000000000000314701434647641600203420ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "unit.h" #include "hipblas.h" #include "hipblas_vector.hpp" #include "utility.h" /* ========================================Gtest Unit Check * ==================================================== */ /*! \brief Template: gtest unit compare two matrices float/double/complex */ // This returns from the current function if an error occurs #ifndef GOOGLE_TEST #define UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, UNIT_ASSERT_EQ) #define UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, UNIT_ASSERT_EQ) #else // GOOGLE_TEST #define UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, UNIT_ASSERT_EQ) \ do \ { \ for(size_t k = 0; k < batch_count; k++) \ for(size_t j = 0; j < N; j++) \ for(size_t i = 0; i < M; i++) \ if(hipblas_isnan(hCPU[i + j * lda + k * strideA])) \ { \ ASSERT_TRUE(hipblas_isnan(hGPU[i + j * lda + k * strideA])); \ } \ else \ { \ UNIT_ASSERT_EQ(hCPU[i + j * lda + k * strideA], \ hGPU[i + j * lda + k * strideA]); \ } \ } while(0) #define UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, UNIT_ASSERT_EQ) \ do \ { \ for(size_t k = 0; k < batch_count; k++) \ for(size_t j = 0; j < N; j++) \ for(size_t i = 0; i < M; i++) \ if(hipblas_isnan(hCPU[k][i + j * lda])) \ { \ ASSERT_TRUE(hipblas_isnan(hGPU[k][i + j * lda])); \ } \ else \ { \ UNIT_ASSERT_EQ(hCPU[k][i + j * lda], hGPU[k][i + j * lda]); \ } \ } while(0) #endif // GOOGLE_TEST #define ASSERT_HALF_EQ(a, b) ASSERT_FLOAT_EQ(half_to_float(a), half_to_float(b)) #define ASSERT_BFLOAT16_EQ(a, b) ASSERT_FLOAT_EQ(bfloat16_to_float(a), bfloat16_to_float(b)) #define ASSERT_FLOAT_COMPLEX_EQ(a, b) \ do \ { \ ASSERT_FLOAT_EQ(a.real(), b.real()); \ ASSERT_FLOAT_EQ(a.imag(), b.imag()); \ } while(0) #define ASSERT_DOUBLE_COMPLEX_EQ(a, b) \ do \ { \ ASSERT_DOUBLE_EQ(a.real(), b.real()); \ ASSERT_DOUBLE_EQ(a.imag(), b.imag()); \ } while(0) template <> void unit_check_general(int M, int N, int lda, hipblasHalf* hCPU, hipblasHalf* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_HALF_EQ); } template <> void unit_check_general(int M, int N, int lda, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_BFLOAT16_EQ); } template <> void unit_check_general(int M, int N, int lda, float* hCPU, float* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_FLOAT_EQ); } template <> void unit_check_general(int M, int N, int lda, double* hCPU, double* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_DOUBLE_EQ); } template <> void unit_check_general(int M, int N, int lda, hipblasComplex* hCPU, hipblasComplex* hGPU) { #ifdef GOOGLE_TEST for(int j = 0; j < N; j++) for(int i = 0; i < M; i++) { ASSERT_FLOAT_EQ(hCPU[i + j * lda].real(), hGPU[i + j * lda].real()); ASSERT_FLOAT_EQ(hCPU[i + j * lda].imag(), hGPU[i + j * lda].imag()); } #endif } template <> void unit_check_general( int M, int N, int lda, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU) { #ifdef GOOGLE_TEST for(int j = 0; j < N; j++) for(int i = 0; i < M; i++) { ASSERT_DOUBLE_EQ(hCPU[i + j * lda].real(), hGPU[i + j * lda].real()); ASSERT_DOUBLE_EQ(hCPU[i + j * lda].imag(), hGPU[i + j * lda].imag()); } #endif } template <> void unit_check_general(int M, int N, int lda, int* hCPU, int* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_EQ); } // batched checks template <> void unit_check_general( int M, int N, int batch_count, int lda, hipblasHalf** hCPU, hipblasHalf** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_HALF_EQ); } template <> void unit_check_general( int M, int N, int batch_count, int lda, hipblasBfloat16** hCPU, hipblasBfloat16** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_BFLOAT16_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, float** hCPU, float** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_FLOAT_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, double** hCPU, double** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_DOUBLE_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, int** hCPU, int** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_EQ); } template <> void unit_check_general( int M, int N, int batch_count, int lda, hipblasComplex** hCPU, hipblasComplex** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_FLOAT_COMPLEX_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, hipblasDoubleComplex** hCPU, hipblasDoubleComplex** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_DOUBLE_COMPLEX_EQ); } // batched checks for host_vector[]s template <> void unit_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_HALF_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_BFLOAT16_EQ); } template <> void unit_check_general( int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_EQ); } template <> void unit_check_general( int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_FLOAT_EQ); } template <> void unit_check_general( int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_DOUBLE_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_FLOAT_COMPLEX_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_DOUBLE_COMPLEX_EQ); } // strided_batched checks template <> void unit_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, hipblasHalf* hCPU, hipblasHalf* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_HALF_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_BFLOAT16_EQ); } template <> void unit_check_general( int M, int N, int batch_count, int lda, hipblasStride strideA, float* hCPU, float* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_FLOAT_EQ); } template <> void unit_check_general( int M, int N, int batch_count, int lda, hipblasStride strideA, double* hCPU, double* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_DOUBLE_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, hipblasComplex* hCPU, hipblasComplex* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_FLOAT_COMPLEX_EQ); } template <> void unit_check_general(int M, int N, int batch_count, int lda, hipblasStride strideA, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_DOUBLE_COMPLEX_EQ); } template <> void unit_check_general( int M, int N, int batch_count, int lda, hipblasStride strideA, int* hCPU, int* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_EQ); } hipBLAS-rocm-5.5.1/clients/common/utility.cpp000066400000000000000000000246621434647641600210730ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************ */ #ifdef WIN32 #include // #include #endif #include "hipblas.h" #include "utility.h" #include #include #include #include #include #include #ifdef WIN32 #define strcasecmp(A, B) _stricmp(A, B) #ifdef __cpp_lib_filesystem #include namespace fs = std::filesystem; #else #include namespace fs = std::experimental::filesystem; #endif // Not WIN32 #else #include #include #endif hipblas_rng_t hipblas_rng(69069); hipblas_rng_t hipblas_seed(hipblas_rng); template <> char type2char() { return 's'; } template <> char type2char() { return 'd'; } // template<> // char type2char(){ // return 'c'; // } // template<> // char type2char(){ // return 'z'; // } template <> int type2int(float val) { return (int)val; } template <> int type2int(double val) { return (int)val; } template <> int type2int(hipblasComplex val) { return (int)val.real(); } template <> int type2int(hipblasDoubleComplex val) { return (int)val.real(); } /* ============================================================================================ */ // Return path of this executable std::string hipblas_exepath() { #ifdef WIN32 std::vector result(MAX_PATH + 1); // Ensure result is large enough to accomodate the path for(;;) { auto length = GetModuleFileNameA(nullptr, result.data(), result.size()); if(length < result.size() - 1) { result.resize(length + 1); // result.shrink_to_fit(); break; } result.resize(result.size() * 2); } fs::path exepath(result.begin(), result.end()); exepath = exepath.remove_filename(); // Add trailing "/" to exepath if required exepath += exepath.empty() ? "" : "/"; return exepath.string(); #else std::string pathstr; char* path = realpath("/proc/self/exe", 0); if(path) { char* p = strrchr(path, '/'); if(p) { p[1] = 0; pathstr = path; } free(path); } return pathstr; #endif } /* ============================================================================================ */ // Temp directory rooted random path std::string hipblas_tempname() { #ifdef WIN32 // Generate "/tmp/hipblas-XXXXXX" like file name const std::string alphanum = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuv"; int stringlength = alphanum.length() - 1; std::string uniquestr = "hipblas-"; for(auto n : {0, 1, 2, 3, 4, 5}) uniquestr += alphanum.at(rand() % stringlength); fs::path tmpname = fs::temp_directory_path() / uniquestr; return tmpname.string(); #else char tmp[] = "/tmp/hipblas-XXXXXX"; int fd = mkostemp(tmp, O_CLOEXEC); if(fd == -1) { dprintf(STDERR_FILENO, "Cannot open temporary file: %m\n"); exit(EXIT_FAILURE); } return std::string(tmp); #endif } /***************** * local handles * *****************/ hipblasLocalHandle::hipblasLocalHandle() { auto status = hipblasCreate(&m_handle); if(status != HIPBLAS_STATUS_SUCCESS) throw std::runtime_error(hipblasStatusToString(status)); } hipblasLocalHandle::hipblasLocalHandle(const Arguments& arg) : hipblasLocalHandle() { hipblasAtomicsMode_t mode; auto status = hipblasGetAtomicsMode(m_handle, &mode); if(status != HIPBLAS_STATUS_SUCCESS) throw std::runtime_error(hipblasStatusToString(status)); if(mode != hipblasAtomicsMode_t(arg.atomics_mode)) status = hipblasSetAtomicsMode(m_handle, hipblasAtomicsMode_t(arg.atomics_mode)); if(status == HIPBLAS_STATUS_SUCCESS) { /* // If the test specifies user allocated workspace, allocate and use it if(arg.user_allocated_workspace) { if((hipMalloc)(&m_memory, arg.user_allocated_workspace) != hipSuccess) throw std::bad_alloc(); status = rocblas_set_workspace(m_handle, m_memory, arg.user_allocated_workspace); } */ } else { throw std::runtime_error(hipblasStatusToString(status)); } } hipblasLocalHandle::~hipblasLocalHandle() { if(m_memory) { CHECK_HIP_ERROR(hipFree(m_memory)); } hipblasStatus_t status = hipblasDestroy(m_handle); if(status != HIPBLAS_STATUS_SUCCESS) { printf("hipblasDestroy error!\n"); } } #ifdef __cplusplus extern "C" { #endif /* ============================================================================================ */ /* timing:*/ /*! \brief CPU Timer(in microsecond): synchronize with the default device and return wall time */ double get_time_us(void) { (void)hipDeviceSynchronize(); auto now = std::chrono::steady_clock::now(); // now.time_since_epoch() is the dureation since epogh // which is converted to microseconds auto duration = std::chrono::duration_cast(now.time_since_epoch()).count(); return (static_cast(duration)); }; /*! \brief CPU Timer(in microsecond): synchronize with given queue/stream and return wall time */ double get_time_us_sync(hipStream_t stream) { (void)hipStreamSynchronize(stream); auto now = std::chrono::steady_clock::now(); // now.time_since_epoch() is the dureation since epogh // which is converted to microseconds auto duration = std::chrono::duration_cast(now.time_since_epoch()).count(); return (static_cast(duration)); }; /* ============================================================================================ */ /* device query and print out their ID and name; return number of compute-capable devices. */ int query_device_property() { int device_count; hipblasStatus_t status = (hipblasStatus_t)hipGetDeviceCount(&device_count); if(status != HIPBLAS_STATUS_SUCCESS) { printf("Query device error: cannot get device count \n"); return -1; } else { printf("Query device success: there are %d devices \n", device_count); } for(int i = 0; i < device_count; i++) { hipDeviceProp_t props; hipblasStatus_t status = (hipblasStatus_t)hipGetDeviceProperties(&props, i); if(status != HIPBLAS_STATUS_SUCCESS) { printf("Query device error: cannot get device ID %d's property\n", i); } else { printf("Device ID %d : %s ------------------------------------------------------\n", i, props.name); printf("with %3.1f GB memory, clock rate %dMHz @ computing capability %d.%d \n", props.totalGlobalMem / 1e9, (int)(props.clockRate / 1000), props.major, props.minor); printf( "maxGridDimX %d, sharedMemPerBlock %3.1f KB, maxThreadsPerBlock %d, warpSize %d\n", props.maxGridSize[0], props.sharedMemPerBlock / 1e3, props.maxThreadsPerBlock, props.warpSize); printf("-------------------------------------------------------------------------\n"); } } return device_count; } /* set current device to device_id */ void set_device(int device_id) { hipblasStatus_t status = (hipblasStatus_t)hipSetDevice(device_id); if(status != HIPBLAS_STATUS_SUCCESS) { printf("Set device error: cannot set device ID %d, there may not be such device ID\n", (int)device_id); } } /******************************************************************************* * GPU architecture-related functions ******************************************************************************/ int getArch() { int device; CHECK_HIP_ERROR(hipGetDevice(&device)); hipDeviceProp_t deviceProperties; CHECK_HIP_ERROR(hipGetDeviceProperties(&deviceProperties, device)); return deviceProperties.gcnArch; } int getArchMajor() { return getArch() / 100; } /******************************************************************************* * gemm_ex int8 layout ******************************************************************************/ bool layout_pack_int8(hipblasHandle_t handle) { // This function should match the rocBLAS function: rocblas_query_int8_layout_flag // // Default behavior is from when int8 was supported on gfx908 and other architectures // used packed_int8x4. All architectures support int8 since the following two PRs // for ROCm 4.2: // - Tensile PR 680 // - rocBLAS-internal PR 1328 hipblasInt8Datatype_t int8Type; hipblasGetInt8Datatype(handle, &int8Type); if(HIPBLAS_INT8_DATATYPE_DEFAULT == int8Type) { int arch = getArch(); return arch != 908; } else if(HIPBLAS_INT8_DATATYPE_INT8 == int8Type) { return false; } else if(HIPBLAS_INT8_DATATYPE_PACK_INT8x4 == int8Type) { return true; } return false; } #ifdef __cplusplus } #endif hipBLAS-rocm-5.5.1/clients/gtest/000077500000000000000000000000001434647641600165105ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/clients/gtest/CMakeLists.txt000066400000000000000000000153201434647641600212510ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ######################################################################## set( THREADS_PREFER_PTHREAD_FLAG ON ) find_package( Threads REQUIRED ) # Linking lapack library requires fortran flags if(NOT WIN32) enable_language( Fortran ) endif() if( NOT TARGET hipblas ) find_package( hipblas REQUIRED CONFIG PATHS /opt/rocm/hipblas ) endif( ) find_package( GTest REQUIRED ) set(hipblas_test_source hipblas_gtest_main.cpp auxiliary_gtest.cpp set_get_pointer_mode_gtest.cpp set_get_vector_gtest.cpp set_get_matrix_gtest.cpp set_get_atomics_mode_gtest.cpp blas1_gtest.cpp axpy_ex_gtest.cpp dot_ex_gtest.cpp nrm2_ex_gtest.cpp rot_ex_gtest.cpp scal_ex_gtest.cpp gbmv_gtest.cpp gbmv_batched_gtest.cpp gbmv_strided_batched_gtest.cpp gemv_gtest.cpp gemv_batched_gtest.cpp gemv_strided_batched_gtest.cpp ger_gtest.cpp hbmv_gtest.cpp hemv_gtest.cpp hemv_batched_gtest.cpp hemv_strided_batched_gtest.cpp her_gtest.cpp her2_gtest.cpp hpmv_gtest.cpp hpr_gtest.cpp hpr2_gtest.cpp sbmv_gtest.cpp spmv_gtest.cpp spr_gtest.cpp spr2_gtest.cpp symv_gtest.cpp syr_gtest.cpp syr2_gtest.cpp tbmv_gtest.cpp tbsv_gtest.cpp tpmv_gtest.cpp tpsv_gtest.cpp trmv_gtest.cpp trsv_gtest.cpp dgmm_gtest.cpp gemm_gtest.cpp gemm_ex_gtest.cpp gemm_strided_batched_gtest.cpp gemm_batched_gtest.cpp hemm_gtest.cpp geam_gtest.cpp herk_gtest.cpp her2k_gtest.cpp herkx_gtest.cpp symm_gtest.cpp syrk_gtest.cpp syr2k_gtest.cpp syrkx_gtest.cpp trsm_gtest.cpp trsm_ex_gtest.cpp trmm_gtest.cpp trtri_gtest.cpp ) if( BUILD_WITH_SOLVER ) set( hipblas_solver_test_source getrf_gtest.cpp getrf_batched_gtest.cpp getrf_strided_batched_gtest.cpp getrs_gtest.cpp getrs_batched_gtest.cpp getrs_strided_batched_gtest.cpp getri_batched_gtest.cpp geqrf_gtest.cpp geqrf_batched_gtest.cpp geqrf_strided_batched_gtest.cpp gels_gtest.cpp gels_batched_gtest.cpp gels_strided_batched_gtest.cpp ) endif( ) if(LINK_BLIS) set( BLIS_CPP ../common/blis_interface.cpp ) endif() set( hipblas_test_common ../common/utility.cpp ../common/cblas_interface.cpp ../common/clients_common.cpp ../common/norm.cpp ../common/unit.cpp ../common/near.cpp ../common/arg_check.cpp ../common/argument_model.cpp ../common/hipblas_arguments.cpp ../common/hipblas_parse_data.cpp ../common/hipblas_datatype2string.cpp ../common/hipblas_template_specialization.cpp ${BLIS_CPP} ) add_executable( hipblas-test ${hipblas_f90_source} ${hipblas_test_source} ${hipblas_solver_test_source} ${hipblas_test_common} ) if(LINK_BLIS) set( BLIS_INCLUDE_DIR ${BUILD_DIR}/deps/blis/include/blis ) set( BLIS_LIBRARY ${BUILD_DIR}/deps/blis/lib/libblis.so ) endif() target_include_directories( hipblas-test PRIVATE $ ) target_compile_definitions( hipblas-test PRIVATE GOOGLE_TEST ) # External header includes included as SYSTEM files target_include_directories( hipblas-test SYSTEM PRIVATE $ $ $ $ ${ROCM_PATH}/include ) if (NOT WIN32) target_link_libraries( hipblas-test PRIVATE hipblas_fortran_client lapack cblas stdc++fs ) endif() target_link_libraries( hipblas-test PRIVATE ${BLAS_LIBRARY} roc::hipblas GTest::GTest Threads::Threads ) # need mf16c flag for float->half convertion target_compile_options( hipblas-test PRIVATE -mf16c ) # -Wno-deprecated-declarations ) target_compile_definitions( hipblas-test PRIVATE HIPBLAS_BFLOAT16_CLASS ROCM_USE_FLOAT16 ) if( NOT USE_CUDA ) target_link_libraries( hipblas-test PRIVATE hip::host ) if( CUSTOM_TARGET ) target_link_libraries( hipblas-test PRIVATE hip::${CUSTOM_TARGET} ) endif( ) if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) # hip-clang needs specific flag to turn on pthread and m target_link_libraries( hipblas-test PRIVATE -lpthread -lm ) if(BUILD_ADDRESS_SANITIZER) target_link_libraries( hipblas-test PRIVATE -fuse-ld=lld -lgfortran ) endif() endif() else( ) target_compile_definitions( hipblas-test PRIVATE __HIP_PLATFORM_NVCC__ ) target_include_directories( hipblas-test PRIVATE $ ) target_link_libraries( hipblas-test PRIVATE ${CUDA_LIBRARIES} ) endif( ) if (WIN32) # for now adding in all .dll as dependency chain is not cmake based on win32 file( GLOB third_party_dlls LIST_DIRECTORIES OFF CONFIGURE_DEPENDS ${LAPACK_DIR}/bin/*.dll ${BLIS_DIR}/lib/*.dll ${OPENBLAS_DIR}/bin/*.dll ${HIP_DIR}/bin/*.dll ${HIP_DIR}/bin/hipinfo.exe ${ROCBLAS_PATH}/bin/rocblas.dll ${ROCSOLVER_PATH}/bin/rocsolver.dll ${CMAKE_SOURCE_DIR}/rtest.* C:/Windows/System32/libomp140*.dll ) foreach( file_i ${third_party_dlls}) add_custom_command( TARGET hipblas-test POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy ${file_i} ${PROJECT_BINARY_DIR}/staging/ ) endforeach( file_i ) add_custom_command( TARGET hipblas-test POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory ${ROCBLAS_PATH}/bin/rocblas/library/ ${PROJECT_BINARY_DIR}/staging/library/) endif() set_target_properties( hipblas-test PROPERTIES CXX_STANDARD 17 CXX_STANDARED_REQUIRED ON CXX_EXTENSIONS OFF IMPORT_PREFIX "" IMPORT_SUFFIX ".lib" LINKER_LANGUAGE CXX DEBUG_POSTFIX "-d" RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) rocm_install(TARGETS hipblas-test COMPONENT tests) hipBLAS-rocm-5.5.1/clients/gtest/auxiliary_gtest.cpp000066400000000000000000000035651434647641600224420ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_exceptions.hpp" #include "utility.h" #include #include #include namespace { TEST(hipblas_auxiliary, statusToString) { EXPECT_EQ(0, strcmp("HIPBLAS_STATUS_ALLOC_FAILED", hipblasStatusToString(HIPBLAS_STATUS_ALLOC_FAILED))); } TEST(hipblas_auxiliary, badOperation) { EXPECT_EQ(testing_bad_operation(), HIPBLAS_STATUS_INVALID_ENUM); } TEST(hipblas_auxiliary, createHandle) { EXPECT_EQ(testing_handle(), HIPBLAS_STATUS_SUCCESS); } } // namespace hipBLAS-rocm-5.5.1/clients/gtest/axpy_ex_gtest.cpp000066400000000000000000000176461434647641600221150ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_axpy_batched_ex.hpp" #include "testing_axpy_ex.hpp" #include "testing_axpy_strided_batched_ex.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, int, vector, bool> axpy_ex_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* When you see this error, do not hack this source code, hack the Makefile. It is due to compilation. from 'testing::internal::CartesianProductHolder3, testing::internal::ParamGenerator >, testing::internal::ParamGenerator > >' to 'testing::internal::ParamGenerator >, std::vector > > >' */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ const int N_range[] = {-1, 10, 500, 1000, 7111}; // vector of vector, each pair is a {alpha, alphai}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_range = {{1.0, 2.0}}; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 2} // negative increments use absolute value for comparisons, so // some combinations may not work as expected. {-1, -1} as done // here is fine const vector> incx_incy_range = { {1, 1}, {2, 3}, {-1, -1}, }; const double stride_scale_range[] = {1.0, 2.5}; const int batch_count_range[] = {-1, 0, 1, 2, 10}; // Supported rocBLAS configs const vector> precisions{ // No cuBLAS support #ifndef __HIP_PLATFORM_NVCC__ {HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F}, {HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_32F}, #endif {HIPBLAS_R_32F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_32F}, {HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F}, {HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F}, {HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F}, {HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F}}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ class axpy_ex_gtest : public ::TestWithParam { protected: axpy_ex_gtest() {} virtual ~axpy_ex_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; Arguments setup_axpy_ex_arguments(axpy_ex_tuple tup) { Arguments arg; arg.N = std::get<0>(tup); arg.alpha = std::get<1>(tup)[0]; arg.alphai = std::get<1>(tup)[1]; arg.incx = std::get<2>(tup)[0]; arg.incy = std::get<2>(tup)[1]; arg.stride_scale = std::get<3>(tup); arg.batch_count = std::get<4>(tup); vector precision_types = std::get<5>(tup); arg.fortran = std::get<6>(tup); arg.a_type = precision_types[0]; arg.b_type = precision_types[1]; arg.c_type = precision_types[2]; arg.compute_type = precision_types[3]; arg.timing = 0; // disable timing data print out. Not supposed to collect performance data in gtest return arg; } // axpy TEST_P(axpy_ex_gtest, axpy_ex) { Arguments arg = setup_axpy_ex_arguments(GetParam()); hipblasStatus_t status = testing_axpy_ex(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || !arg.incx || !arg.incy) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.a_type == HIPBLAS_R_16F) { EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); // unsupported CUDA configs } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(axpy_ex_gtest, axpy_batched_ex) { Arguments arg = setup_axpy_ex_arguments(GetParam()); hipblasStatus_t status = testing_axpy_batched_ex(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || !arg.incx || !arg.incy || arg.batch_count <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(axpy_ex_gtest, axpy_strided_batched_ex) { Arguments arg = setup_axpy_ex_arguments(GetParam()); hipblasStatus_t status = testing_axpy_strided_batched_ex(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || !arg.incx || !arg.incy || arg.batch_count <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // Values is for a single item; ValuesIn is for an array // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p INSTANTIATE_TEST_SUITE_P(hipblasAxpyEx, axpy_ex_gtest, Combine(ValuesIn(N_range), ValuesIn(alpha_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(precisions), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/blas1_gtest.cpp000066400000000000000000002207351434647641600214350ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_asum.hpp" #include "testing_asum_batched.hpp" #include "testing_asum_strided_batched.hpp" #include "testing_axpy.hpp" #include "testing_axpy_batched.hpp" #include "testing_axpy_strided_batched.hpp" #include "testing_copy.hpp" #include "testing_copy_batched.hpp" #include "testing_copy_strided_batched.hpp" #include "testing_dot.hpp" #include "testing_dot_batched.hpp" #include "testing_dot_strided_batched.hpp" #include "testing_iamax_iamin.hpp" #include "testing_iamax_iamin_batched.hpp" #include "testing_iamax_iamin_strided_batched.hpp" #include "testing_nrm2.hpp" #include "testing_nrm2_batched.hpp" #include "testing_nrm2_strided_batched.hpp" #include "testing_rot.hpp" #include "testing_rot_batched.hpp" #include "testing_rot_strided_batched.hpp" #include "testing_rotg.hpp" #include "testing_rotg_batched.hpp" #include "testing_rotg_strided_batched.hpp" #include "testing_rotm.hpp" #include "testing_rotm_batched.hpp" #include "testing_rotm_strided_batched.hpp" #include "testing_rotmg.hpp" #include "testing_rotmg_batched.hpp" #include "testing_rotmg_strided_batched.hpp" #include "testing_scal.hpp" #include "testing_scal_batched.hpp" #include "testing_scal_strided_batched.hpp" #include "testing_swap.hpp" #include "testing_swap_batched.hpp" #include "testing_swap_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, int, bool> blas1_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* When you see this error, do not hack this source code, hack the Makefile. It is due to compilation. from 'testing::internal::CartesianProductHolder3, testing::internal::ParamGenerator >, testing::internal::ParamGenerator > >' to 'testing::internal::ParamGenerator >, std::vector > > >' */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ const int N_range[] = {-1, 10, 500, 1000, 7111, 10000}; // vector of vector, each pair is a {alpha, alphai, beta, betai}; // add/delete this list in pairs, like {2.0, 3.0, 4.0, 5.0} const vector> alpha_beta_range = {{1.0, 2.0, 0.0, 0.0}, {2.0, -1.0, -1.0, 2.0}}; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 2} // negative increments use absolute value for comparisons, so // some combinations may not work as expected. {-1, -1} as done // here is fine const vector> incx_incy_range = { {1, 1}, {1, 2}, {-1, -1}, }; const double stride_scale_range[] = {1.0, 2.5}; const int batch_count_range[] = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-1: scal, dot, nrm2, asum, amax, amin, axpy, copy, swap =================================================================== */ class blas1_gtest : public ::TestWithParam { protected: blas1_gtest() {} virtual ~blas1_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; Arguments setup_blas1_arguments(blas1_tuple tup) { int N = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); vector incx_incy = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; arg.N = N; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; arg.timing = 0; // disable timing data print out. Not supposed to collect performance data in gtest return arg; } // axpy #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas1_gtest, axpy_half) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_axpy(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(!arg.incx || !arg.incy) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif TEST_P(blas1_gtest, axpy_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_axpy(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(!arg.incx || !arg.incy) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, axpy_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_axpy(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(!arg.incx || !arg.incy) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // axpy_batched TEST_P(blas1_gtest, axpy_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_axpy_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(!arg.incx || !arg.incy) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, axpy_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_axpy_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(!arg.incx || !arg.incy) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // axpy_strided_batched TEST_P(blas1_gtest, axpy_strided_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_axpy_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(!arg.incx || !arg.incy) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, axpy_strided_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_axpy_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(!arg.incx || !arg.incy) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // copy tests TEST_P(blas1_gtest, copy_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_copy(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, copy_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_copy(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // copy_batched tests TEST_P(blas1_gtest, copy_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_copy_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, copy_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_copy_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // copy_strided_batched tests TEST_P(blas1_gtest, copy_strided_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_copy_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, copy_strided_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_copy_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // scal tests TEST_P(blas1_gtest, scal_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_scal(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, scal_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_scal(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, scal_float_complex_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_scal(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // scal_batched tests TEST_P(blas1_gtest, scal_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_scal_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { // for cublas EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); } } } TEST_P(blas1_gtest, scal_batched_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_scal_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { // for cublas EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); } } } TEST_P(blas1_gtest, scal_batched_float_complex_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_scal_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { // for cublas EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); } } } // scal_strided_batched tests TEST_P(blas1_gtest, scal_strided_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_scal_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { // for cublas EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); } } } TEST_P(blas1_gtest, scal_strided_batched_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_scal_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { // for cublas EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); } } } TEST_P(blas1_gtest, scal_strided_batched_float_complex_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_scal_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { // for cublas EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); } } } #endif // swap tests TEST_P(blas1_gtest, swap_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_swap(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, swap_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_swap(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // swap_batched tests TEST_P(blas1_gtest, swap_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_swap_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, swap_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_swap_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } // swap_strided_batched tests TEST_P(blas1_gtest, swap_strided_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_swap_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, swap_strided_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_swap_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } #endif // dot tests #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas1_gtest, dot_half) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dot_bfloat16) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } #endif TEST_P(blas1_gtest, dot_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dotu_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, dotc_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dotc(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // dot_batched tests TEST_P(blas1_gtest, dot_batched_half) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dot_batched_bfloat16) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dot_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dotu_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dotc_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dotc_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } // dot_strided_batched tests TEST_P(blas1_gtest, dot_strided_batched_half) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dot_strided_batched_bfloat16) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dot_strided_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dotu_strided_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dot_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, dotc_strided_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_dotc_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } #endif // nrm2 tests TEST_P(blas1_gtest, nrm2_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_nrm2(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, nrm2_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_nrm2(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // nrm2_batched tests TEST_P(blas1_gtest, nrm2_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_nrm2_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(blas1_gtest, nrm2_batched_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_nrm2_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // nrm2_strided_batched tests TEST_P(blas1_gtest, nrm2_strided_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_nrm2_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, nrm2_strided_batched_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_nrm2_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // rot TEST_P(blas1_gtest, rot_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rot(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } TEST_P(blas1_gtest, rot_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rot(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } TEST_P(blas1_gtest, rot_float_complex_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rot(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } #ifndef __HIP_PLATFORM_NVCC__ // rot_batched TEST_P(blas1_gtest, rot_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rot_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, rot_batched_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rot_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, rot_batched_float_complex_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rot_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // rot_strided_batched TEST_P(blas1_gtest, rot_strided_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rot_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, rot_strided_batched_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rot_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, rot_strided_batched_float_complex_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rot_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // rotg TEST_P(blas1_gtest, rotg_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotg(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } TEST_P(blas1_gtest, rotg_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotg(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } #ifndef __HIP_PLATFORM_NVCC__ // rotg_batched TEST_P(blas1_gtest, rotg_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotg_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, rotg_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotg_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // rotg_strided_batched TEST_P(blas1_gtest, rotg_strided_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotg_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, rotg_strided_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotg_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // rotm TEST_P(blas1_gtest, rotm_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } #ifndef __HIP_PLATFORM_NVCC__ // rotm_batched TEST_P(blas1_gtest, rotm_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // rotm_strided_batched TEST_P(blas1_gtest, rotm_strided_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // rotmg TEST_P(blas1_gtest, rotmg_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotmg(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } #ifndef __HIP_PLATFORM_NVCC__ // rotmg_batched TEST_P(blas1_gtest, rotmg_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotmg_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // rotmg_strided_batched TEST_P(blas1_gtest, rotmg_strided_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_rotmg_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // asum TEST_P(blas1_gtest, asum_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_asum(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, asum_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_asum(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, asum_double_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_asum(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // asum_batched TEST_P(blas1_gtest, asum_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_asum_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, asum_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_asum_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, asum_batched_double_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_asum_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // asum_strided_batched TEST_P(blas1_gtest, asum_strided_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_asum_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, asum_strided_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_asum_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, asum_strided_batched_double_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_asum_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // amax TEST_P(blas1_gtest, amax_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amax(arg); EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } TEST_P(blas1_gtest, amax_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amax(arg); EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } #ifndef __HIP_PLATFORM_NVCC__ // amax_batched TEST_P(blas1_gtest, amax_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amax_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, amax_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amax_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // amax_strided_batched TEST_P(blas1_gtest, amax_strided_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amax_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, amax_strided_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amax_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // amin TEST_P(blas1_gtest, amin_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amin(arg); EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } TEST_P(blas1_gtest, amin_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amin(arg); EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } #ifndef __HIP_PLATFORM_NVCC__ // amin_batched TEST_P(blas1_gtest, amin_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amin_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, amin_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amin_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // amin_strided_batched TEST_P(blas1_gtest, amin_strided_batched_float) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amin_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas1_gtest, amin_strided_batched_float_complex) { Arguments arg = setup_blas1_arguments(GetParam()); hipblasStatus_t status = testing_amin_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // Values is for a single item; ValuesIn is for an array // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { N, {alpha, beta}, {incx, incy} } INSTANTIATE_TEST_SUITE_P(hipblasBlas1, blas1_gtest, Combine(ValuesIn(N_range), ValuesIn(alpha_beta_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/dgmm_gtest.cpp000066400000000000000000000253271434647641600213570ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_dgmm.hpp" #include "testing_dgmm_batched.hpp" #include "testing_dgmm_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, char, double, int, bool> dgmm_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda, incx, ldc}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1, -1, -1}, {128, 128, 150, 2, 150}, {1000, 1000, 1000, 2, 1000}, // TODO: rocBLAS dgmm is currently broken when (M != N && incx < 0 && side == L) // {128, 130, 150, -1, 150}, }; const vector side_range = { 'L', 'R', }; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 1, 5}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 dgmm: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 DGMM does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_dgmm_arguments(dgmm_tuple tup) { vector matrix_size = std::get<0>(tup); char side = std::get<1>(tup); double stride_scale = std::get<2>(tup); int batch_count = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.incx = matrix_size[3]; arg.ldc = matrix_size[4]; arg.side = side; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class dgmm_gtest : public ::TestWithParam { protected: dgmm_gtest() {} virtual ~dgmm_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // It appears that cublas and rocblas differ with their // dgmm results. Disable tests until they match. // TODO: re-enable tests when rocblas matches cublas. TEST_P(dgmm_gtest, dgmm_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dgmm_arguments(GetParam()); hipblasStatus_t status = testing_dgmm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(dgmm_gtest, dgmm_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dgmm_arguments(GetParam()); hipblasStatus_t status = testing_dgmm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(dgmm_gtest, dgmm_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dgmm_arguments(GetParam()); hipblasStatus_t status = testing_dgmm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.ldc < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(dgmm_gtest, dgmm_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dgmm_arguments(GetParam()); hipblasStatus_t status = testing_dgmm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.ldc < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(dgmm_gtest, dgmm_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dgmm_arguments(GetParam()); hipblasStatus_t status = testing_dgmm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.ldc < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(dgmm_gtest, dgmm_strided_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dgmm_arguments(GetParam()); hipblasStatus_t status = testing_dgmm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.ldc < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M}, {incx,incy} {alpha, alphai, beta, betai}, {transA}, {stride_scale}, {batch_count} } INSTANTIATE_TEST_SUITE_P(hipblasDgmm, dgmm_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(side_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/dot_ex_gtest.cpp000066400000000000000000000253531434647641600217140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_dot_batched_ex.hpp" #include "testing_dot_ex.hpp" #include "testing_dot_strided_batched_ex.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, double, int, vector, bool> dot_ex_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* When you see this error, do not hack this source code, hack the Makefile. It is due to compilation. from 'testing::internal::CartesianProductHolder3, testing::internal::ParamGenerator >, testing::internal::ParamGenerator > >' to 'testing::internal::ParamGenerator >, std::vector > > >' */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ const int N_range[] = {-1, 10, 500, 1000, 7111}; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 2} // negative increments use absolute value for comparisons, so // some combinations may not work as expected. {-1, -1} as done // here is fine const vector> incx_incy_range = { {1, 1}, {-1, -1}, }; const double stride_scale_range[] = {1.0, 2.5}; const int batch_count_range[] = {-1, 0, 1, 2, 10}; const vector> precisions{ // Not supported in cuBLAS #ifndef __HIP_PLATFORM_NVCC__ {HIPBLAS_R_16B, HIPBLAS_R_16B, HIPBLAS_R_16B, HIPBLAS_R_32F}, {HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F}, #endif // Supported in both rocBLAS and cuBLAS {HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_32F}, {HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F}, {HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F}, {HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F}, {HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F}}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ class dot_ex_gtest : public ::TestWithParam { protected: dot_ex_gtest() {} virtual ~dot_ex_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; Arguments setup_dot_ex_arguments(dot_ex_tuple tup) { Arguments arg; vector incx_incy = std::get<1>(tup); arg.stride_scale = std::get<2>(tup); arg.batch_count = std::get<3>(tup); vector precision_types = std::get<4>(tup); arg.fortran = std::get<5>(tup); arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.a_type = precision_types[0]; arg.b_type = precision_types[1]; arg.c_type = precision_types[2]; arg.compute_type = precision_types[3]; arg.timing = 0; // disable timing data print out. Not supposed to collect performance data in gtest return arg; } // dot tests TEST_P(dot_ex_gtest, dot_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dot_ex_arguments(GetParam()); hipblasStatus_t status = testing_dot_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(dot_ex_gtest, dot_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dot_ex_arguments(GetParam()); hipblasStatus_t status = testing_dot_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.incy < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(dot_ex_gtest, dot_strided_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dot_ex_arguments(GetParam()); hipblasStatus_t status = testing_dot_strided_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.incy < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // dotc tests TEST_P(dot_ex_gtest, dotc_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dot_ex_arguments(GetParam()); hipblasStatus_t status = testing_dotc_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.incy < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(dot_ex_gtest, dotc_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dot_ex_arguments(GetParam()); hipblasStatus_t status = testing_dotc_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.incy < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(dot_ex_gtest, dotc_strided_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_dot_ex_arguments(GetParam()); hipblasStatus_t status = testing_dotc_strided_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.incy < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // Values is for a single item; ValuesIn is for an array // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p INSTANTIATE_TEST_SUITE_P(hipblasDotEx, dot_ex_gtest, Combine(ValuesIn(N_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(precisions), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/gbmv_batched_gtest.cpp000066400000000000000000000167331434647641600230410ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gbmv_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, char, int, bool> gbmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, KL, KU, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1, -1, -1}, {1000, 1000, 150, 84, 1000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {-1, -1}, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {2.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgbmv/dgbmv, const vector transA_range = { 'N', 'T', // 'C', }; // number of gbmv in batched gbmv const vector batch_count_range = {-1, 0, 1, 5}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 gbmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GBMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gbmv_arguments(gbmv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); char transA = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.KL = matrix_size[2]; arg.KU = matrix_size[3]; arg.lda = matrix_size[4]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.batch_count = batch_count; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.fortran = fortran; arg.timing = 0; return arg; } class gbmv_batched_gtest : public ::TestWithParam { protected: gbmv_batched_gtest() {} virtual ~gbmv_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(gbmv_batched_gtest, gbmv_gtest_float) { Arguments arg = setup_gbmv_arguments(GetParam()); hipblasStatus_t status = testing_gbmv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.KL < 0 || arg.KU < 0 || arg.lda < arg.KU + arg.KL + 1 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gbmv_batched_gtest, gbmv_gtest_float_complex) { Arguments arg = setup_gbmv_arguments(GetParam()); hipblasStatus_t status = testing_gbmv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.KL < 0 || arg.KU < 0 || arg.lda < arg.KU + arg.KL + 1 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha, beta}, {transA}, {batch_count} } INSTANTIATE_TEST_SUITE_P(hipblasGbmvBatched, gbmv_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/gbmv_gtest.cpp000066400000000000000000000174541434647641600213700ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gbmv.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, char, bool> gbmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, KL, KU, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1, -1, -1}, // {10, 10, 2}, // {600,500, 500}, {1000, 1000, 150, 84, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {0, -1}, {-1, -1}, // {10, 100}, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {1.0, 0.0}, {-1.0, -1.0}, {2.0, 1.0}, {0.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgbmv/dgbmv, const vector transA_range = { 'N', 'T', 'C', }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 gbmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GBMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gbmv_arguments(gbmv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); char transA = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.KL = matrix_size[2]; arg.KU = matrix_size[3]; arg.lda = matrix_size[4]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.fortran = fortran; arg.timing = 0; return arg; } class gbmv_gtest : public ::TestWithParam { protected: gbmv_gtest() {} virtual ~gbmv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(gbmv_gtest, gbmv_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gbmv_arguments(GetParam()); hipblasStatus_t status = testing_gbmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.KL < 0 || arg.KU < 0 || arg.lda < arg.KU + arg.KL + 1 || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gbmv_gtest, gbmv_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gbmv_arguments(GetParam()); hipblasStatus_t status = testing_gbmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.KL < 0 || arg.KU < 0 || arg.lda < arg.KU + arg.KL + 1 || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha, beta}, {transA} } INSTANTIATE_TEST_SUITE_P(hipblasGbmv, gbmv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/gbmv_strided_batched_gtest.cpp000066400000000000000000000177411434647641600245570ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gbmv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, vector, char, int, bool> gbmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, KL, KU, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1, -1, -1}, {1000, 1000, 150, 84, 1000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {-1, -1}, }; // a vector of single double values. This value will be multiplied by // appropriate dimensions to get the stride between vectors and matrices const vector stride_scale_range = { 1, 2, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {2.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgbmv/dgbmv, const vector transA_range = { 'N', 'T', // 'C', }; // number of gemms in batched gemm const vector batch_count_range = {-1, 0, 1, 5}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 gbmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GBMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gbmv_arguments(gbmv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); double stride_scale = std::get<2>(tup); vector alpha_beta = std::get<3>(tup); char transA = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.KL = matrix_size[2]; arg.KU = matrix_size[3]; arg.lda = matrix_size[4]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; // see the comments about stride_scale above arg.stride_scale = stride_scale; arg.batch_count = batch_count; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.fortran = fortran; arg.timing = 0; return arg; } class gbmv_strided_batched_gtest : public ::TestWithParam { protected: gbmv_strided_batched_gtest() {} virtual ~gbmv_strided_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(gbmv_strided_batched_gtest, gbmv_gtest_float) { Arguments arg = setup_gbmv_arguments(GetParam()); hipblasStatus_t status = testing_gbmv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.KL < 0 || arg.KU < 0 || arg.lda < arg.KL + arg.KU + 1 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gbmv_strided_batched_gtest, gbmv_gtest_float_complex) { Arguments arg = setup_gbmv_arguments(GetParam()); hipblasStatus_t status = testing_gbmv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.KL < 0 || arg.KU < 0 || arg.lda < arg.KL + arg.KU + 1 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {stride_scale}, {alpha, beta}, {transA}, {batch_count} } INSTANTIATE_TEST_SUITE_P(hipblasGbmvStridedBatched, gbmv_strided_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/geam_gtest.cpp000066400000000000000000000275661434647641600213530ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_geam.hpp" #include "testing_geam_batched.hpp" #include "testing_geam_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, double, int, bool> geam_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ // vector of vector, each vector is a {M, N, lda, ldb, ldc}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1, 1, 1}, {5, 5, 5, 5, 5}, {3, 33, 33, 34, 35}, {10, 10, 100, 10, 10}, {600, 500, 500, 600, 500}, // {1024, 1024, 1024, 1024, 1024} }; // vector of vector, each pair is a {alpha, alphai, beta, betai}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {2.0, -3.0, 0.0, 0.0}, {3.0, 1.0, 1.0, -1.0}, {0.0, 0.0, 2.0, -5.0}, {0.0, 0.0, 0.0, 0.0}, }; // vector of vector, each pair is a {transA, transB}; // add/delete this list in pairs, like {'N', 'T'} // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgeam/dgeam, // TODO: Conjugate was broken up to rocBLAS 3.5. Add conjugate tests when fixed. const vector> transA_transB_range = {{'N', 'N'}, {'N', 'T'}}; //, {'C', 'N'}, {'T', 'C'}}; const vector stride_scale_range = {1, 3}; const vector batch_count_range = {1, 3, 5}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 GEAM: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_geam_arguments(geam_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); vector transA_transB = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.ldc = matrix_size[4]; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.transA = transA_transB[0]; arg.transB = transA_transB[1]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; arg.timing = 0; return arg; } class geam_gtest : public ::TestWithParam { protected: geam_gtest() {} virtual ~geam_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(geam_gtest, geam_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_geam_arguments(GetParam()); hipblasStatus_t status = testing_geam(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || (arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) || (arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) || arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geam_gtest, geam_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_geam_arguments(GetParam()); hipblasStatus_t status = testing_geam(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || (arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) || (arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) || arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(geam_gtest, geam_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_geam_arguments(GetParam()); hipblasStatus_t status = testing_geam_batched(arg); if(status == HIPBLAS_STATUS_NOT_SUPPORTED) return; // for cuda // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || (arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) || (arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) || arg.ldc < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geam_gtest, geam_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_geam_arguments(GetParam()); hipblasStatus_t status = testing_geam_batched(arg); if(status == HIPBLAS_STATUS_NOT_SUPPORTED) return; // for cuda // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || (arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) || (arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) || arg.ldc < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geam_gtest, geam_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_geam_arguments(GetParam()); hipblasStatus_t status = testing_geam_strided_batched(arg); if(status == HIPBLAS_STATUS_NOT_SUPPORTED) return; // for cuda // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || (arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) || (arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) || arg.ldc < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geam_gtest, geam_strided_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_geam_arguments(GetParam()); hipblasStatus_t status = testing_geam_strided_batched(arg); if(status == HIPBLAS_STATUS_NOT_SUPPORTED) return; // for cuda // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || (arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) || (arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) || arg.ldc < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // This function mainly test the scope of alpha_beta, transA_transB,.the scope of matrix_size_range // is small INSTANTIATE_TEST_SUITE_P(hipblasGeam_scalar_transpose, geam_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/gels_batched_gtest.cpp000066400000000000000000000162331434647641600230330ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gels_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, char, int, bool> gels_batched_tuple; typedef std::tuple gels_batched_bad_arg_tuple; // {m, n, nrhs, lda, ldb} const vector> matrix_size_range = {{-1, -1, -1, 1, 1}, {10, 10, 10, 10, 10}, {10, 10, 10, 20, 100}, {600, 500, 400, 600, 600}}; const vector trans_range = { 'N', // 'T', // commenting this out for now as cuBLAS only supports non-transpose }; const vector batch_count_range = {-1, 0, 1, 2}; const vector is_fortran = {false, true}; Arguments setup_gels_batched_arguments(gels_batched_tuple tup) { vector matrix_size = std::get<0>(tup); char trans = std::get<1>(tup); int batchCount = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.K = matrix_size[2]; // nrhs arg.lda = matrix_size[3]; arg.ldb = matrix_size[4]; arg.transA = trans; arg.batch_count = batchCount; arg.fortran = fortran; return arg; } class gels_batched_gtest_bad_arg : public ::TestWithParam { protected: gels_batched_gtest_bad_arg() {} virtual ~gels_batched_gtest_bad_arg() {} virtual void SetUp() {} virtual void TearDown() {} }; class gels_batched_gtest : public ::TestWithParam { protected: gels_batched_gtest() {} virtual ~gels_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // Not doing bad_arg testing with cuBLAS backend for now // Error codes given by cuBLAS seem inaccurate. #ifndef __HIP_PLATFORM_NVCC__ TEST_P(gels_batched_gtest_bad_arg, gels_batched_gtest_bad_arg_test) { Arguments arg; EXPECT_EQ(testing_gels_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_gels_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_gels_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_gels_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); } #endif TEST_P(gels_batched_gtest, gels_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_batched_arguments(GetParam()); hipblasStatus_t status = testing_gels_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gels_batched_gtest, gels_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_batched_arguments(GetParam()); hipblasStatus_t status = testing_gels_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gels_batched_gtest, gels_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_batched_arguments(GetParam()); hipblasStatus_t status = testing_gels_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gels_batched_gtest, gels_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_batched_arguments(GetParam()); hipblasStatus_t status = testing_gels_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, nrhs, lda, ldb}, trans, batchCount, fortran } INSTANTIATE_TEST_SUITE_P(hipblasGelsBatched, gels_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(trans_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #ifndef __HIP_PLATFORM_NVCC__ INSTANTIATE_TEST_SUITE_P(hipblasGelsBatchedBadArg, gels_batched_gtest_bad_arg, Combine(ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/gels_gtest.cpp000066400000000000000000000144431434647641600213620ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gels.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, char, bool> gels_tuple; typedef std::tuple gels_bad_arg_tuple; // {m, n, nrhs, lda, ldb} const vector> matrix_size_range = {{-1, -1, -1, 1, 1}, {10, 10, 10, 10, 10}, {10, 10, 10, 20, 100}, {600, 500, 400, 600, 600}}; const vector trans_range = { 'N', 'T', }; const vector is_fortran = {false, true}; Arguments setup_gels_arguments(gels_tuple tup) { vector matrix_size = std::get<0>(tup); char trans = std::get<1>(tup); bool fortran = std::get<2>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.K = matrix_size[2]; // nrhs arg.lda = matrix_size[3]; arg.ldb = matrix_size[4]; arg.transA = trans; arg.fortran = fortran; return arg; } class gels_gtest_bad_arg : public ::TestWithParam { protected: gels_gtest_bad_arg() {} virtual ~gels_gtest_bad_arg() {} virtual void SetUp() {} virtual void TearDown() {} }; class gels_gtest : public ::TestWithParam { protected: gels_gtest() {} virtual ~gels_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(gels_gtest_bad_arg, gels_gtest_bad_arg_test) { Arguments arg; EXPECT_EQ(testing_gels_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_gels_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_gels_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_gels_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); } TEST_P(gels_gtest, gels_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_arguments(GetParam()); hipblasStatus_t status = testing_gels(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gels_gtest, gels_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_arguments(GetParam()); hipblasStatus_t status = testing_gels(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gels_gtest, gels_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_arguments(GetParam()); hipblasStatus_t status = testing_gels(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gels_gtest, gels_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_arguments(GetParam()); hipblasStatus_t status = testing_gels(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, nrhs, lda, ldb}, trans, fortran } INSTANTIATE_TEST_SUITE_P(hipblasGels, gels_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(trans_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(hipblasGelsBadArg, gels_gtest_bad_arg, Combine(ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/gels_strided_batched_gtest.cpp000066400000000000000000000167471434647641600245630ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gels_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, char, double, int, bool> gels_strided_batched_tuple; typedef std::tuple gels_strided_batched_bad_arg_tuple; // {m, n, nrhs, lda, ldb} const vector> matrix_size_range = {{-1, -1, -1, 1, 1}, {10, 10, 10, 10, 10}, {10, 10, 10, 20, 100}, {600, 500, 400, 600, 600}}; const vector trans_range = { 'N', 'T', }; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 1, 2}; const vector is_fortran = {false, true}; Arguments setup_gels_strided_batched_arguments(gels_strided_batched_tuple tup) { vector matrix_size = std::get<0>(tup); char trans = std::get<1>(tup); double strideScale = std::get<2>(tup); int batchCount = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.K = matrix_size[2]; // nrhs arg.lda = matrix_size[3]; arg.ldb = matrix_size[4]; arg.transA = trans; arg.stride_scale = strideScale; arg.batch_count = batchCount; arg.fortran = fortran; return arg; } class gels_strided_batched_gtest_bad_arg : public ::TestWithParam { protected: gels_strided_batched_gtest_bad_arg() {} virtual ~gels_strided_batched_gtest_bad_arg() {} virtual void SetUp() {} virtual void TearDown() {} }; class gels_strided_batched_gtest : public ::TestWithParam { protected: gels_strided_batched_gtest() {} virtual ~gels_strided_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(gels_strided_batched_gtest_bad_arg, gels_strided_batched_gtest_bad_arg_test) { Arguments arg; EXPECT_EQ(testing_gels_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_gels_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_gels_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_gels_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); } TEST_P(gels_strided_batched_gtest, gels_strided_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_gels_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gels_strided_batched_gtest, gels_strided_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_gels_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gels_strided_batched_gtest, gels_strided_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_gels_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gels_strided_batched_gtest, gels_strided_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_gels_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_gels_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.lda < arg.M || arg.ldb < arg.M || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, nrhs, lda, ldb}, trans, batchCount, fortran } INSTANTIATE_TEST_SUITE_P(hipblasGelsStridedBatched, gels_strided_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(trans_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(hipblasGelsStridedBatchedBadArg, gels_strided_batched_gtest_bad_arg, Combine(ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/gemm_batched_gtest.cpp000066400000000000000000000274071434647641600230330ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gemm_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; /* TEST(hipblas_blas3, gemm_batched_float_bad_arg) { testing_gemm_batched_device_array(); } */ // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, int, bool> gemm_batched_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, K, lda, ldb, ldc}; // add/delete as a group, in batched gemm, the matrix is much smaller than standard gemm const vector> matrix_size_range = { // {-1, -1, -1, -1, 1, 1}, {10, 10, 10, 10, 10, 10}, {32, 32, 32, 100, 100, 100}, {64, 64, 64, 128, 128, 128}, {128, 128, 128, 128, 128, 128}, // {500, 500, 500, 500, 600, 500}, }; // vector of vector, each pair is a {alpha, alphai, beta, betai}; // add/delete this list in pairs, like {2.0, 3.0, 4.0, 5.0} const vector> alpha_beta_range = { {1.0, 2.0, 0.0, 0.0}, {-1.0, 2.0, -1.0, 1.0}, }; // vector of vector, each pair is a {transA, transB}; // add/delete this list in pairs, like {'N', 'T'} // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgemm_batched/dgemm_batched, const vector> transA_transB_range = {{'N', 'N'}, {'N', 'T'}, {'C', 'N'}, {'T', 'C'}}; // number of gemms in batched gemm const vector batch_count_range = { -1, 0, 1, 2, 10, // 100, }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 gemm_batched: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gemm_batched_arguments(gemm_batched_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); vector transA_transB = std::get<2>(tup); int batch_count = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.K = matrix_size[2]; arg.lda = matrix_size[3]; arg.ldb = matrix_size[4]; arg.ldc = matrix_size[5]; // the first 2 elements of alpha_beta_range are always alpha, and the second 2 are always beta arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.transA = transA_transB[0]; arg.transB = transA_transB[1]; arg.batch_count = batch_count; arg.timing = 0; arg.fortran = fortran; return arg; } class gemm_batched_gtest : public ::TestWithParam { protected: gemm_batched_gtest() {} virtual ~gemm_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(gemm_batched_gtest, float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_batched_arguments(GetParam()); hipblasStatus_t status = testing_gemm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemm_batched_gtest, double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_batched_arguments(GetParam()); hipblasStatus_t status = testing_gemm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemm_batched_gtest, hipblasComplex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_batched_arguments(GetParam()); hipblasStatus_t status = testing_gemm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemm_batched_gtest, hipblasDoubleComplex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_batched_arguments(GetParam()); hipblasStatus_t status = testing_gemm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, K, lda, ldb, ldc}, {alpha, beta}, {transA, transB}, {batch_count} // } INSTANTIATE_TEST_SUITE_P(hipblasGemmBatched, gemm_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/gemm_ex_gtest.cpp000066400000000000000000000744531434647641600220600ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gemm_batched_ex.hpp" #include "testing_gemm_ex.hpp" #include "testing_gemm_strided_batched_ex.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, vector, int, bool> gemm_ex_tuple; // clang-format off // vector of vector, each vector is a {M, N, K, lda, ldb, ldc}; // add/delete as a group const vector> int8_matrix_size_range = { { 4, 4, 4, 4, 4, 4}, { 8, 8, 8, 8, 8, 8}, {12, 12, 12, 12, 12, 12}, {16, 16, 16, 16, 16, 16}, {20, 20, 20, 20, 20, 20}, { 8, 4, 4, 8, 8, 8}, { 8, 12, 12, 12, 12, 12}, }; const vector> small_matrix_size_range = { { 1, 1, 1, 1, 1, 1}, { 1, 2, 3, 4, 5, 6}, { 7, 9, 15, 17, 18, 19}, { 8, 1, 1, 8, 8, 8}, { 2, 2, 2, 2, 2, 2}, { 3, 3, 3, 3, 3, 3}, { 4, 4, 4, 4, 4, 4}, { 5, 5, 5, 5, 5, 5}, { 6, 6, 6, 6, 6, 6}, { 7, 7, 7, 7, 7, 7}, { 8, 8, 8, 8, 8, 8}, { 9, 9, 9, 9, 9, 9}, {10, 10, 10, 10, 10, 10}, {11, 11, 11, 11, 11, 11}, {12, 12, 12, 12, 12, 12}, {13, 13, 13, 13, 13, 13}, {14, 14, 14, 14, 14, 14}, {15, 15, 15, 15, 15, 15}, {16, 16, 16, 16, 16, 16}, {17, 17, 17, 17, 17, 17}, {18, 18, 18, 18, 18, 18}, {19, 19, 19, 19, 19, 19}, {20, 20, 20, 20, 20, 20}, { 2, 3, 4, 5, 6, 7}, { 3, 4, 5, 6, 7, 8}, { 4, 5, 6, 6, 6, 6}, { 5, 6, 7, 7, 8, 9}, { 6, 7, 8, 10, 9, 8}, { 7, 8, 9, 11, 9, 10}, { 8, 9, 10, 10, 11, 12}, { 9, 10, 11, 12, 11, 13}, {13, 12, 11, 15, 14, 13}, {15, 16, 17, 17, 18, 19}, {18, 17, 16, 18, 18, 18}, {16, 17, 18, 20, 19, 18}, { 8, 2, 2, 8, 8, 8}, { 8, 3, 3, 8, 8, 8}, { 8, 4, 4, 8, 8, 8}, { 8, 5, 5, 8, 8, 8}, { 8, 6, 6, 8, 8, 8}, { 8, 7, 7, 8, 8, 8}, { 8, 9, 9, 9, 9, 9}, { 8, 10, 10, 10, 10, 10}, { 8, 11, 11, 11, 11, 11}, { 8, 12, 12, 12, 12, 12}, { 8, 13, 13, 13, 13, 13}, { 8, 14, 14, 14, 14, 14}, { 8, 15, 15, 15, 15, 15}, // {16, 15, 15, 16, 16, 16}, // {16, 17, 17, 17, 17, 17}, // {17, 16, 16, 17, 17, 17}, // {16, 18, 18, 18, 18, 18}, // {24, 24, 24, 24, 24, 24}, // {32, 32, 32, 32, 32, 32}, // {40, 40, 40, 40, 40, 40}, // {48, 48, 48, 48, 48, 48}, // {56, 56, 56, 56, 56, 56}, // {64, 64, 64, 64, 64, 64}, // {72, 72, 72, 72, 72, 72}, }; const vector> medium_matrix_size_range = { {127, 127, 63, 127, 127, 127}, {128, 127, 63, 128, 128, 128}, {129, 127, 63, 129, 129, 129}, // {127, 128, 63, 128, 127, 127}, // {128, 128, 63, 128, 127, 127}, // {129, 128, 63, 129, 129, 129}, // {127, 129, 63, 129, 129, 129}, // {128, 129, 63, 129, 129, 129}, // {129, 129, 63, 129, 129, 129}, // {127, 127, 64, 127, 127, 127}, // {128, 127, 64, 128, 128, 128}, // {129, 127, 64, 129, 129, 129}, // {127, 128, 64, 128, 127, 127}, // {128, 128, 64, 128, 127, 127}, // {129, 128, 64, 129, 129, 129}, // {127, 129, 64, 129, 129, 129}, // {128, 129, 64, 129, 129, 129}, // {129, 129, 64, 129, 129, 129}, // {127, 127, 65, 127, 127, 127}, // {128, 127, 65, 128, 128, 128}, // {129, 127, 65, 129, 129, 129}, // {127, 128, 65, 128, 127, 127}, // {128, 128, 65, 128, 127, 127}, // {129, 128, 65, 129, 129, 129}, // {127, 129, 65, 129, 129, 129}, // {128, 129, 65, 129, 129, 129}, // {129, 129, 65, 129, 129, 129}, // {191, 193, 194, 195, 196, 197}, // {500, 501, 502, 503, 604, 505}, // {639, 640, 347, 960, 961,1062}, }; // vector of vector, each vector is a {M, N, K, lda, ldb, ldc}; const vector> large_matrix_size_range = { {1000, 1001, 101, 2002, 1003, 1004}, { 925, 1026, 1027, 1028, 2029, 1031}, {4011, 4012, 103, 4014, 4015, 4016}, }; // vector of vector, each vector is a {M, N, K, lda, ldb, ldc}; const vector> chunk_matrix_size_range = { {24000, 256, 256, 24010, 256, 24000}, {24000, 256, 256, 24000, 256, 24020}, { 256, 24001, 256, 256, 24030, 24000}, { 256, 24001, 256, 256, 24000, 24040}, }; // vector of vector, each vector is a {M, N, K, lda, ldb, ldc}; const vector> NaN_matrix_size_range = { { 5, 6, 7, 8, 9, 10}, {4011, 4012, 111, 4013, 4014, 4015}, }; // vector of vector, each pair is a {alpha, alphai, beta, betai}; // add/delete this list in pairs, like {2.0, 3.0, 4.0, 5.0} const vector> alpha_beta_range = { {5.0, 2.0, 0.0, 0.0}, {0.0, 0.0, 3.0, 0.0}, {1.0, -2.0, -3.0, 4.0}, }; // For Cuda v < 10.0, only alpha and beta = 1 or = 0 are // supported. const vector> alpha_beta_range_int8 = { {1.0, 0.0, 1.0, 0.0}, {1.0, 0.0, 0.0, 0.0}, }; // vector of vector, each pair is a {transA, transB}; // add/delete this list in pairs, like {'N', 'T'} // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgemm/dgemm, const vector> small_transA_transB_range = {{'N', 'N'}}; const vector> transA_transB_range = {{'N', 'N'}, {'N', 'T'}, {'C', 'N'}, {'T', 'C'}}; // a_type, b_type, c_type, d_type, compute_type const vector> precision_half = {{ HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F }}; const vector> precision_hpa_half = {{ HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_32F }}; const vector> precision_single = {{ HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F }}; const vector> precision_double = {{ HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F }}; const vector> precision_single_complex = {{ HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F }}; const vector> precision_double_complex = {{ HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F }}; const vector> precision_int8 = {{ HIPBLAS_R_8I, HIPBLAS_R_8I, HIPBLAS_R_32I, HIPBLAS_R_32I, HIPBLAS_R_32I }}; const vector> precision_type_range = {{HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F}, {HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_32F}, {HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F}, {HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F}, {HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F}, {HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F}, {HIPBLAS_R_8I, HIPBLAS_R_8I, HIPBLAS_R_32I, HIPBLAS_R_32I, HIPBLAS_R_32I}}; const int batch_count_range[] = { -1, 1, 5 }; const int batch_count_range_small[] = { 1 }; const bool is_fortran[] = {false, true}; const bool is_fortran_false[] = {false}; // clang-format on /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 GEMM: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gemm_ex_arguments(gemm_ex_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); vector transA_transB = std::get<2>(tup); vector precision_types = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.K = matrix_size[2]; arg.lda = matrix_size[3]; arg.ldb = matrix_size[4]; arg.ldc = matrix_size[5]; // the first 2 elements of alpha_beta_range are always alpha, and the second 2 are always beta arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.transA = transA_transB[0]; arg.transB = transA_transB[1]; arg.timing = 0; arg.a_type = precision_types[0]; arg.b_type = precision_types[1]; arg.c_type = precision_types[2]; arg.compute_type = precision_types[4]; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class gemm_ex_gtest : public ::TestWithParam { protected: gemm_ex_gtest() {} virtual ~gemm_ex_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(gemm_ex_gtest, standard) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_ex_arguments(GetParam()); hipblasStatus_t status = testing_gemm_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { // Only available in cuda cc >= 5.0. // If we want we can change this to call query_device_property() and // call this only if cc < 5.0 on a CUDA device, else fail. EXPECT_EQ(HIPBLAS_STATUS_ARCH_MISMATCH, status); } } } class gemm_batch_ex_gtest : public ::TestWithParam { protected: gemm_batch_ex_gtest() {} virtual ~gemm_batch_ex_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(gemm_batch_ex_gtest, gemm_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_ex_arguments(GetParam()); hipblasStatus_t status = testing_gemm_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(status == HIPBLAS_STATUS_ARCH_MISMATCH) { // Only available in cuda cc >= 5.0. // If we want we can change this to call query_device_property() and // call this only if cc < 5.0 on a CUDA device, else fail. EXPECT_EQ(HIPBLAS_STATUS_ARCH_MISMATCH, status); } else { #ifndef __HIP_PLATFORM_NVCC__ // on HIP we should pass all tests EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); #else // cublas/rocblas do not have identical support // (i.e. cublas doesn't support i8/i32 here) EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); #endif } } } TEST_P(gemm_batch_ex_gtest, gemm_strided_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_ex_arguments(GetParam()); hipblasStatus_t status = testing_gemm_strided_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(status == HIPBLAS_STATUS_ARCH_MISMATCH) { // Only available in cuda cc >= 5.0. // If we want we can change this to call query_device_property() and // call this only if cc < 5.0 on a CUDA device, else fail. EXPECT_EQ(HIPBLAS_STATUS_ARCH_MISMATCH, status); } else { #ifndef __HIP_PLATFORM_NVCC__ // on HIP we should pass all tests EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); #else // cublas/rocblas do not have identical support // (i.e. cublas doesn't support i8/i32 here) EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); #endif } } } // TODO: Disabling some gemm int8 tests as not supported by rocBLAS for all architectures // class parameterized_chunk_gemm_ex : public ::TestWithParam // { // protected: // parameterized_chunk_gemm_ex() {} // virtual ~parameterized_chunk_gemm_ex() {} // virtual void SetUp() {} // virtual void TearDown() {} // }; // TEST_P(parameterized_chunk_gemm_ex, float) // { // // GetParam return a tuple. Tee setup routine unpack the tuple // // and initializes arg(Arguments) which will be passed to testing routine // // The Arguments data struture have physical meaning associated. // // while the tuple is non-intuitive. // Arguments arg = setup_gemm_ex_arguments(GetParam()); // hipblasStatus_t status = testing_gemm_ex(arg); // // if not success, then the input argument is problematic, so detect the error message // if(status != HIPBLAS_STATUS_SUCCESS) // { // if(arg.M < 0 || arg.N < 0 || arg.K < 0) // { // EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); // } // else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) // { // EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); // } // else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) // { // EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); // } // else // { // // Only available in cuda cc >= 5.0. // // If we want we can change this to call query_device_property() and // // call this only if cc < 5.0 on a CUDA device, else fail. // EXPECT_EQ(HIPBLAS_STATUS_ARCH_MISMATCH, status); // } // } // } // class parameterized_half_gemm_ex : public ::TestWithParam // { // protected: // parameterized_half_gemm_ex() {} // virtual ~parameterized_half_gemm_ex() {} // virtual void SetUp() {} // virtual void TearDown() {} // }; INSTANTIATE_TEST_SUITE_P(quick_blas_ex_small_int8, gemm_ex_gtest, Combine(ValuesIn(int8_matrix_size_range), ValuesIn(alpha_beta_range_int8), ValuesIn(transA_transB_range), ValuesIn(precision_int8), ValuesIn(batch_count_range_small), ValuesIn(is_fortran))); // TEST(pre_checkin_blas_ex_bad_arg, float) { testing_gemm_ex_bad_arg(); } //----small INSTANTIATE_TEST_SUITE_P(quick_blas_ex_small_hpa_half, gemm_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_hpa_half), ValuesIn(batch_count_range_small), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_ex_small_half, gemm_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_half), ValuesIn(batch_count_range_small), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_ex_small_single, gemm_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_single), ValuesIn(batch_count_range_small), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_ex_small_double, gemm_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_double), ValuesIn(batch_count_range_small), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_ex_small_single_complex, gemm_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_single_complex), ValuesIn(batch_count_range_small), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_ex_small_double_complex, gemm_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_double_complex), ValuesIn(batch_count_range_small), ValuesIn(is_fortran))); //----medium INSTANTIATE_TEST_SUITE_P(pre_checkin_blas_ex_medium_hpa_half, gemm_ex_gtest, Combine(ValuesIn(medium_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_hpa_half), ValuesIn(batch_count_range_small), ValuesIn(is_fortran_false))); INSTANTIATE_TEST_SUITE_P(pre_checkin_blas_ex_medium_half, gemm_ex_gtest, Combine(ValuesIn(medium_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_half), ValuesIn(batch_count_range_small), ValuesIn(is_fortran_false))); INSTANTIATE_TEST_SUITE_P(pre_checkin_blas_ex_medium_float, gemm_ex_gtest, Combine(ValuesIn(medium_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_single), ValuesIn(batch_count_range_small), ValuesIn(is_fortran_false))); INSTANTIATE_TEST_SUITE_P(pre_checkin_blas_ex_medium_double, gemm_ex_gtest, Combine(ValuesIn(medium_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_double), ValuesIn(batch_count_range_small), ValuesIn(is_fortran_false))); //----small-batched INSTANTIATE_TEST_SUITE_P(quick_blas_batched_ex_small_hpa_half, gemm_batch_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_hpa_half), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_batched_ex_small_half, gemm_batch_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_half), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_batched_ex_small_single, gemm_batch_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_single), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_batched_ex_small_double, gemm_batch_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_double), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_batched_ex_small_single_complex, gemm_batch_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_single_complex), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_batched_ex_small_double_complex, gemm_batch_ex_gtest, Combine(ValuesIn(small_matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(precision_double_complex), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(quick_blas_batched_ex_small_int8, gemm_batch_ex_gtest, Combine(ValuesIn(int8_matrix_size_range), ValuesIn(alpha_beta_range_int8), ValuesIn(transA_transB_range), ValuesIn(precision_int8), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/gemm_gtest.cpp000066400000000000000000000247051434647641600213570ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gemm.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, bool> gemm_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ // vector of vector, each vector is a {M, N, K, lda, ldb, ldc}; // add/delete as a group const vector> matrix_size_range = { // {-1, -1, -1, -1, 1, 1}, {3, 33, 3, 33, 35, 35} // { 5, 5, 5, 5, 5, 5}, // {10, 10, 20, 100, 10, 10}, // {600,500, 500, 500, 600, 500}, // {1024, 1024, 1024, 1024, 1024, 1024} }; // vector of vector, each pair is a {alpha, alphai, beta, betai}; // add/delete this list in pairs, like {2.0, 3.0, 4.0, 5.0} const vector> alpha_beta_range = {{2.0, 2.0, 0.0, 0.0}, {0.0, 0.0, 1.0, 2.0}}; // vector of vector, each pair is a {transA, transB}; // add/delete this list in pairs, like {'N', 'T'} // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgemm/dgemm, const vector> transA_transB_range = {{'N', 'N'}, {'N', 'T'}, {'C', 'N'}, {'T', 'C'}}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 GEMM: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gemm_arguments(gemm_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); vector transA_transB = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.K = matrix_size[2]; arg.lda = matrix_size[3]; arg.ldb = matrix_size[4]; arg.ldc = matrix_size[5]; // the first 2 elements of alpha_beta_range are always alpha, and the second 2 are always beta arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.transA = transA_transB[0]; arg.transB = transA_transB[1]; arg.fortran = fortran; arg.timing = 0; return arg; } class gemm_gtest : public ::TestWithParam { protected: gemm_gtest() {} virtual ~gemm_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(gemm_gtest, gemm_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_arguments(GetParam()); hipblasStatus_t status = testing_gemm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemm_gtest, gemm_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_arguments(GetParam()); hipblasStatus_t status = testing_gemm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemm_gtest, gemm_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_arguments(GetParam()); hipblasStatus_t status = testing_gemm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemm_gtest, gemm_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_arguments(GetParam()); hipblasStatus_t status = testing_gemm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, K, lda, ldb, ldc}, {alpha, beta}, {transA, transB} } // THis function mainly test the scope of alpha_beta, transA_transB,.the scope of matrix_size_range // is small INSTANTIATE_TEST_SUITE_P(hipblasGemm_scalar_transpose, gemm_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/gemm_strided_batched_gtest.cpp000066400000000000000000000275061434647641600245510ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gemm_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, int, bool> gemm_strided_batched_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, K, lda, ldb, ldc}; // add/delete as a group, in batched gemm, the matrix is much smaller than standard gemm const vector> matrix_size_range = { // {-1, -1, -1, -1, 1, 1}, {32, 32, 32, 100, 100, 100}, {64, 64, 64, 128, 128, 128}, {128, 128, 128, 128, 128, 128}, // {500, 500, 500, 500, 600, 500}, }; // vector of vector, each pair is a {alpha, alphai, beta, betai}; // add/delete this list in pairs, like {2.0, 3.0, 4.0, 5.0} const vector> alpha_beta_range = { {1.0, 2.0, 0.0, 0.0}, {-1.0, 1.0, -1.0, 2.0}, }; // vector of vector, each pair is a {transA, transB}; // add/delete this list in pairs, like {'N', 'T'} // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgemm_strided_batched/dgemm_strided_batched, const vector> transA_transB_range = {{'N', 'N'}, {'N', 'T'}, {'C', 'N'}, {'T', 'C'}}; // number of gemms in batched gemm const vector batch_count_range = { -1, 0, 1, 2, 10, // 100, }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 gemm_strided_batched: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gemm_strided_batched_arguments(gemm_strided_batched_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); vector transA_transB = std::get<2>(tup); int batch_count = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.K = matrix_size[2]; arg.lda = matrix_size[3]; arg.ldb = matrix_size[4]; arg.ldc = matrix_size[5]; // the first 2 elements of alpha_beta_range are always alpha, and the second 2 are always beta arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.transA = transA_transB[0]; arg.transB = transA_transB[1]; arg.batch_count = batch_count; arg.timing = 0; arg.fortran = fortran; return arg; } class gemm_strided_batched_gtest : public ::TestWithParam { protected: gemm_strided_batched_gtest() {} virtual ~gemm_strided_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(gemm_strided_batched_gtest, float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_gemm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemm_strided_batched_gtest, double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_gemm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemm_strided_batched_gtest, hipblasComplex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_gemm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemm_strided_batched_gtest, hipblasDoubleComplex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemm_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_gemm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.K < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transA == 'N' ? arg.lda < arg.M : arg.lda < arg.K) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.transB == 'N' ? arg.ldb < arg.K : arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldc < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, K, lda, ldb, ldc}, {alpha, beta}, {transA, transB}, {batch_count} // } INSTANTIATE_TEST_SUITE_P(hipblasGemmStridedBatched, gemm_strided_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(transA_transB_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/gemv_batched_gtest.cpp000066400000000000000000000176711434647641600230460ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gemv_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, char, int, bool> gemv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1}, {1000, 1000, 1000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {-1, -1}, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {2.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgemv/dgemv, const vector transA_range = { 'N', 'T', // 'C', }; // number of gemms in batched gemm const vector batch_count_range = { -1, 0, 2, }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 gemv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gemv_arguments(gemv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); char transA = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.batch_count = batch_count; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.fortran = fortran; arg.timing = 0; return arg; } class gemv_batched_gtest : public ::TestWithParam { protected: gemv_batched_gtest() {} virtual ~gemv_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(gemv_batched_gtest, gemv_gtest_float) { Arguments arg = setup_gemv_arguments(GetParam()); hipblasStatus_t status = testing_gemv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } } } TEST_P(gemv_batched_gtest, gemv_gtest_float_complex) { Arguments arg = setup_gemv_arguments(GetParam()); hipblasStatus_t status = testing_gemv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha, beta}, {transA}, {batch_count} } INSTANTIATE_TEST_SUITE_P(hipblasGemvBatched, gemv_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/gemv_gtest.cpp000066400000000000000000000202511434647641600213600ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gemv.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, char, bool> gemv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1}, // {10, 10, 2}, // {600,500, 500}, {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {0, -1}, {-1, -1}, // {10, 100}, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {1.0, 0.0}, {-1.0, -1.0}, {2.0, 1.0}, {0.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgemv/dgemv, const vector transA_range = { 'N', 'T', 'C', }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 gemv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gemv_arguments(gemv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); char transA = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.fortran = fortran; arg.timing = 0; return arg; } class gemv_gtest : public ::TestWithParam { protected: gemv_gtest() {} virtual ~gemv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(gemv_gtest, gemv_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemv_arguments(GetParam()); hipblasStatus_t status = testing_gemv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(gemv_gtest, gemv_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_gemv_arguments(GetParam()); hipblasStatus_t status = testing_gemv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha, beta}, {transA} } INSTANTIATE_TEST_SUITE_P(hipblasGemv, gemv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/gemv_strided_batched_gtest.cpp000066400000000000000000000207101434647641600245500ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_gemv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, vector, char, int, bool> gemv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1}, {1000, 1000, 1000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {-1, -1}, }; // a vector of single double values. This value will be multiplied by // appropriate dimensions to get the stride between vectors and matrices const vector stride_scale_range = { 1, 1.5, 2, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {2.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // sgemv/dgemv, const vector transA_range = { 'N', 'T', // 'C', }; // number of gemms in batched gemm const vector batch_count_range = { -1, 0, 2, }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 gemv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_gemv_arguments(gemv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); double stride_scale = std::get<2>(tup); vector alpha_beta = std::get<3>(tup); char transA = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; // see the comments about stride_scale above arg.stride_scale = stride_scale; arg.batch_count = batch_count; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.fortran = fortran; arg.timing = 0; return arg; } class gemv_strided_batched_gtest : public ::TestWithParam { protected: gemv_strided_batched_gtest() {} virtual ~gemv_strided_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(gemv_strided_batched_gtest, gemv_gtest_float) { Arguments arg = setup_gemv_arguments(GetParam()); hipblasStatus_t status = testing_gemv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(gemv_strided_batched_gtest, gemv_gtest_float_complex) { Arguments arg = setup_gemv_arguments(GetParam()); hipblasStatus_t status = testing_gemv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incx <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {stride_scale}, {alpha, beta}, {transA}, {batch_count} } INSTANTIATE_TEST_SUITE_P(hipblasGemvStridedBatched, gemv_strided_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/geqrf_batched_gtest.cpp000066400000000000000000000150401434647641600232000ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_geqrf_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> geqrf_batched_tuple; typedef std::tuple geqrf_batched_bad_arg_tuple; const vector> matrix_size_range = {{10, 10, 10}, {10, 10, 20}, {600, 500, 600}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {2}; const vector is_fortran = {false, true}; Arguments setup_geqrf_batched_arguments(geqrf_batched_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class geqrf_batched_gtest_bad_arg : public ::TestWithParam { protected: geqrf_batched_gtest_bad_arg() {} virtual ~geqrf_batched_gtest_bad_arg() {} virtual void SetUp() {} virtual void TearDown() {} }; class geqrf_batched_gtest : public ::TestWithParam { protected: geqrf_batched_gtest() {} virtual ~geqrf_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(geqrf_batched_gtest_bad_arg, geqrf_batched_gtest_bad_arg_test) { Arguments arg; EXPECT_EQ(testing_geqrf_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_geqrf_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_geqrf_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_geqrf_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); } TEST_P(geqrf_batched_gtest, geqrf_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_geqrf_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(geqrf_batched_gtest, geqrf_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_geqrf_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(geqrf_batched_gtest, geqrf_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_geqrf_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(geqrf_batched_gtest, geqrf_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_geqrf_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGeqrfBatched, geqrf_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(hipblasGeqrfBatchedBadArg, geqrf_batched_gtest_bad_arg, Combine(ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/geqrf_gtest.cpp000066400000000000000000000142431434647641600215320ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_geqrf.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> geqrf_tuple; typedef std::tuple geqrf_bad_arg_tuple; const vector> matrix_size_range = {{10, 10, 10}, {10, 10, 20}, {600, 500, 600}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {1}; const vector is_fortran = {false, true}; Arguments setup_geqrf_arguments(geqrf_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class geqrf_gtest_bad_arg : public ::TestWithParam { protected: geqrf_gtest_bad_arg() {} virtual ~geqrf_gtest_bad_arg() {} virtual void SetUp() {} virtual void TearDown() {} }; class geqrf_gtest : public ::TestWithParam { protected: geqrf_gtest() {} virtual ~geqrf_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(geqrf_gtest_bad_arg, geqrf_gtest_bad_arg_test) { Arguments arg; EXPECT_EQ(testing_geqrf_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_geqrf_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_geqrf_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_geqrf_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); } TEST_P(geqrf_gtest, geqrf_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_arguments(GetParam()); hipblasStatus_t status = testing_geqrf(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geqrf_gtest, geqrf_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_arguments(GetParam()); hipblasStatus_t status = testing_geqrf(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geqrf_gtest, geqrf_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_arguments(GetParam()); hipblasStatus_t status = testing_geqrf(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geqrf_gtest, geqrf_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_arguments(GetParam()); hipblasStatus_t status = testing_geqrf(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGeqrf, geqrf_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(hipblasGeqrfBadArg, geqrf_gtest_bad_arg, Combine(ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/geqrf_strided_batched_gtest.cpp000066400000000000000000000156661434647641600247340ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_geqrf_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> geqrf_strided_batched_tuple; typedef std::tuple geqrf_strided_batched_bad_arg_tuple; const vector> matrix_size_range = {{10, 10, 10}, {10, 10, 20}, {600, 500, 600}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 2}; const vector is_fortran = {false, true}; Arguments setup_geqrf_strided_batched_arguments(geqrf_strided_batched_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class geqrf_strided_batched_gtest_bad_arg : public ::TestWithParam { protected: geqrf_strided_batched_gtest_bad_arg() {} virtual ~geqrf_strided_batched_gtest_bad_arg() {} virtual void SetUp() {} virtual void TearDown() {} }; class geqrf_strided_batched_gtest : public ::TestWithParam { protected: geqrf_strided_batched_gtest() {} virtual ~geqrf_strided_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(geqrf_strided_batched_gtest_bad_arg, geqrf_strided_batched_gtest_bad_arg_test) { Arguments arg; EXPECT_EQ(testing_geqrf_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_geqrf_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_geqrf_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_geqrf_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); } TEST_P(geqrf_strided_batched_gtest, geqrf_strided_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_geqrf_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geqrf_strided_batched_gtest, geqrf_strided_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_geqrf_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geqrf_strided_batched_gtest, geqrf_strided_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_geqrf_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(geqrf_strided_batched_gtest, geqrf_strided_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_geqrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_geqrf_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGeqrfStridedBatched, geqrf_strided_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(hipblasGeqrfStridedBatchedBadArg, geqrf_strided_batched_gtest_bad_arg, Combine(ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/ger_gtest.cpp000066400000000000000000000335341434647641600212070ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_ger.hpp" #include "testing_ger_batched.hpp" #include "testing_ger_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, double, int, bool> ger_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1}, {11, 11, 11}, {16, 16, 16}, {32, 32, 32}, {65, 65, 65} // {10, 10, 2}, // {600,500, 500}, // {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000} }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {-1, -1}, {0, -1}, {2, 1} // {10, 100} }; // vector, each entry is {alpha, alphai}; // add/delete single values, like {2.0, 1.0} const vector> alpha_range = {{-0.5, 1.5}, {2.0, 1.0}, {0.0, 0.0}}; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 ger: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_ger_arguments(ger_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.alpha = alpha[0]; arg.alphai = alpha[1]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_ger_gtest : public ::TestWithParam { protected: blas2_ger_gtest() {} virtual ~blas2_ger_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // ger TEST_P(blas2_ger_gtest, ger_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_ger_arguments(GetParam()); hipblasStatus_t status = testing_ger(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_ger_gtest, geru_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_ger_arguments(GetParam()); hipblasStatus_t status = testing_ger(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_ger_gtest, gerc_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_ger_arguments(GetParam()); hipblasStatus_t status = testing_ger(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // ger_batched TEST_P(blas2_ger_gtest, ger_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_ger_arguments(GetParam()); hipblasStatus_t status = testing_ger_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_ger_gtest, geru_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_ger_arguments(GetParam()); hipblasStatus_t status = testing_ger_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_ger_gtest, gerc_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_ger_arguments(GetParam()); hipblasStatus_t status = testing_ger_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // ger_strided_batched TEST_P(blas2_ger_gtest, ger_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_ger_arguments(GetParam()); hipblasStatus_t status = testing_ger_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_ger_gtest, geru_strided_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_ger_arguments(GetParam()); hipblasStatus_t status = testing_ger_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_ger_gtest, gerc_strided_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_ger_arguments(GetParam()); hipblasStatus_t status = testing_ger_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasGer, blas2_ger_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/getrf_batched_gtest.cpp000066400000000000000000000206401434647641600232050ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_getrf_batched.hpp" #include "testing_getrf_npvt_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> getrf_batched_tuple; const vector> matrix_size_range = {{-1, -1, 1, 1}, {10, 10, 10, 10}, {10, 10, 20, 100}, {600, 500, 600, 600}, {1024, 1024, 1024, 1024}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 1, 2}; const vector is_fortran = {false, true}; Arguments setup_getrf_batched_arguments(getrf_batched_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; //arg.ldb = matrix_size[3]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class getrf_batched_gtest : public ::TestWithParam { protected: getrf_batched_gtest() {} virtual ~getrf_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(getrf_batched_gtest, getrf_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrf_batched_gtest, getrf_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrf_batched_gtest, getrf_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrf_batched_gtest, getrf_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrf_batched_gtest, getrf_npvt_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrf_batched_gtest, getrf_npvt_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrf_batched_gtest, getrf_npvt_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrf_batched_gtest, getrf_npvt_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGetrfBatched, getrf_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/getrf_gtest.cpp000066400000000000000000000177561434647641600215510ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_getrf.hpp" #include "testing_getrf_npvt.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> getrf_tuple; const vector> matrix_size_range = {{-1, -1, 1, 1}, {10, 10, 10, 10}, {10, 10, 20, 100}, {600, 500, 600, 600}, {1024, 1024, 1024, 1024}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {1}; const vector is_fortran = {false, true}; Arguments setup_getrf_arguments(getrf_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; //arg.ldb = matrix_size[3]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class getrf_gtest : public ::TestWithParam { protected: getrf_gtest() {} virtual ~getrf_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(getrf_gtest, getrf_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_arguments(GetParam()); hipblasStatus_t status = testing_getrf(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_gtest, getrf_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_arguments(GetParam()); hipblasStatus_t status = testing_getrf(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_gtest, getrf_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_arguments(GetParam()); hipblasStatus_t status = testing_getrf(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_gtest, getrf_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_arguments(GetParam()); hipblasStatus_t status = testing_getrf(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_gtest, getrf_npvt_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_gtest, getrf_npvt_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_gtest, getrf_npvt_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_gtest, getrf_npvt_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGetrf, getrf_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/getrf_strided_batched_gtest.cpp000066400000000000000000000215361434647641600247300ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_getrf_npvt_strided_batched.hpp" #include "testing_getrf_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> getrf_strided_batched_tuple; const vector> matrix_size_range = {{-1, -1, 1, 1}, {10, 10, 10, 10}, {10, 10, 20, 100}, {600, 500, 600, 600}, {1024, 1024, 1024, 1024}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 1, 2}; const vector is_fortran = {false, true}; Arguments setup_getrf_strided_batched_arguments(getrf_strided_batched_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; //arg.ldb = matrix_size[3]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class getrf_strided_batched_gtest : public ::TestWithParam { protected: getrf_strided_batched_gtest() {} virtual ~getrf_strided_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(getrf_strided_batched_gtest, getrf_strided_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_strided_batched_gtest, getrf_strided_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_strided_batched_gtest, getrf_strided_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_strided_batched_gtest, getrf_strided_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_strided_batched_gtest, getrf_npvt_strided_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_strided_batched_gtest, getrf_npvt_strided_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_strided_batched_gtest, getrf_npvt_strided_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrf_strided_batched_gtest, getrf_npvt_strided_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrf_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrf_npvt_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGetrfStridedBatched, getrf_strided_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/getri_batched_gtest.cpp000066400000000000000000000201741434647641600232120ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_getri_batched.hpp" #include "testing_getri_npvt_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> getri_batched_tuple; const vector> matrix_size_range = {{-1, 1}, {10, 10}, {10, 20}, {500, 600}, {1024, 1024}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 1, 2}; const vector is_fortran = {false, true}; Arguments setup_getri_batched_arguments(getri_batched_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.N = matrix_size[0]; arg.lda = matrix_size[1]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class getri_batched_gtest : public ::TestWithParam { protected: getri_batched_gtest() {} virtual ~getri_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(getri_batched_gtest, getri_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getri_batched_arguments(GetParam()); hipblasStatus_t status = testing_getri_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getri_batched_gtest, getri_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getri_batched_arguments(GetParam()); hipblasStatus_t status = testing_getri_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getri_batched_gtest, getri_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getri_batched_arguments(GetParam()); hipblasStatus_t status = testing_getri_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getri_batched_gtest, getri_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getri_batched_arguments(GetParam()); hipblasStatus_t status = testing_getri_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getri_batched_gtest, getri_npvt_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getri_batched_arguments(GetParam()); hipblasStatus_t status = testing_getri_npvt_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getri_batched_gtest, getri_npvt_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getri_batched_arguments(GetParam()); hipblasStatus_t status = testing_getri_npvt_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getri_batched_gtest, getri_npvt_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getri_batched_arguments(GetParam()); hipblasStatus_t status = testing_getri_npvt_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getri_batched_gtest, getri_npvt_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getri_batched_arguments(GetParam()); hipblasStatus_t status = testing_getri_npvt_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {M, N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGetriBatched, getri_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/getrs_batched_gtest.cpp000066400000000000000000000151251434647641600232240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_getrs_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> getrs_batched_tuple; typedef std::tuple getrs_batched_bad_arg_tuple; const vector> matrix_size_range = {{-1, 1, 1}, {10, 20, 100}, {500, 600, 600}, {1024, 1024, 1024}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 1, 2}; const vector is_fortran = {false, true}; Arguments setup_getrs_batched_arguments(getrs_batched_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.N = matrix_size[0]; arg.lda = matrix_size[1]; arg.ldb = matrix_size[2]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class getrs_batched_gtest_bad_arg : public ::TestWithParam { protected: getrs_batched_gtest_bad_arg() {} virtual ~getrs_batched_gtest_bad_arg() {} virtual void SetUp() {} virtual void TearDown() {} }; class getrs_batched_gtest : public ::TestWithParam { protected: getrs_batched_gtest() {} virtual ~getrs_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(getrs_batched_gtest_bad_arg, getrs_batched_gtest_bad_arg_test) { Arguments arg; EXPECT_EQ(testing_getrs_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_getrs_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_getrs_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_getrs_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); } TEST_P(getrs_batched_gtest, getrs_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrs_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrs_batched_gtest, getrs_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrs_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrs_batched_gtest, getrs_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrs_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } TEST_P(getrs_batched_gtest, getrs_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrs_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGetrsBatched, getrs_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(hipblasGetrsBatchedBadArg, getrs_batched_gtest_bad_arg, Combine(ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/getrs_gtest.cpp000066400000000000000000000143171434647641600215540ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_getrs.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> getrs_tuple; typedef std::tuple getrs_bad_arg_tuple; const vector> matrix_size_range = {{-1, 1, 1}, {10, 20, 100}, {500, 600, 600}, {1024, 1024, 1024}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {1}; const vector is_fortran = {false, true}; Arguments setup_getrs_arguments(getrs_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.N = matrix_size[0]; arg.lda = matrix_size[1]; arg.ldb = matrix_size[2]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class getrs_gtest_bad_arg : public ::TestWithParam { protected: getrs_gtest_bad_arg() {} virtual ~getrs_gtest_bad_arg() {} virtual void SetUp() {} virtual void TearDown() {} }; class getrs_gtest : public ::TestWithParam { protected: getrs_gtest() {} virtual ~getrs_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(getrs_gtest_bad_arg, getrs_gtest_bad_arg_test) { Arguments arg; EXPECT_EQ(testing_getrs_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_getrs_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_getrs_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_getrs_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); } TEST_P(getrs_gtest, getrs_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_arguments(GetParam()); hipblasStatus_t status = testing_getrs(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrs_gtest, getrs_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_arguments(GetParam()); hipblasStatus_t status = testing_getrs(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrs_gtest, getrs_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_arguments(GetParam()); hipblasStatus_t status = testing_getrs(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrs_gtest, getrs_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_arguments(GetParam()); hipblasStatus_t status = testing_getrs(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGetrs, getrs_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(hipblasGetrsBadArg, getrs_gtest_bad_arg, Combine(ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/getrs_strided_batched_gtest.cpp000066400000000000000000000157451434647641600247520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_getrs_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, double, int, bool> getrs_strided_batched_tuple; typedef std::tuple getrs_strided_batched_bad_arg_tuple; const vector> matrix_size_range = {{-1, 1, 1}, {10, 20, 100}, {500, 600, 600}, {1024, 1024, 1024}}; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 1, 2}; const vector is_fortran = {false, true}; Arguments setup_getrs_strided_batched_arguments(getrs_strided_batched_tuple tup) { vector matrix_size = std::get<0>(tup); double stride_scale = std::get<1>(tup); int batch_count = std::get<2>(tup); bool fortran = std::get<3>(tup); Arguments arg; arg.N = matrix_size[0]; arg.lda = matrix_size[1]; arg.ldb = matrix_size[2]; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class getrs_strided_batched_gtest_bad_arg : public ::TestWithParam { protected: getrs_strided_batched_gtest_bad_arg() {} virtual ~getrs_strided_batched_gtest_bad_arg() {} virtual void SetUp() {} virtual void TearDown() {} }; class getrs_strided_batched_gtest : public ::TestWithParam { protected: getrs_strided_batched_gtest() {} virtual ~getrs_strided_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(getrs_strided_batched_gtest_bad_arg, getrs_strided_batched_gtest_bad_arg_test) { Arguments arg; EXPECT_EQ(testing_getrs_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_getrs_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_getrs_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(testing_getrs_strided_batched_bad_arg(arg), HIPBLAS_STATUS_SUCCESS); } TEST_P(getrs_strided_batched_gtest, getrs_strided_batched_gtest_float) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrs_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrs_strided_batched_gtest, getrs_strided_batched_gtest_double) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrs_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrs_strided_batched_gtest, getrs_strided_batched_gtest_float_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrs_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(getrs_strided_batched_gtest, getrs_strided_batched_gtest_double_complex) { // GetParam returns a tuple. The setup routine unpacks the tuple // and initializes arg(Arguments), which will be passed to testing routine. Arguments arg = setup_getrs_strided_batched_arguments(GetParam()); hipblasStatus_t status = testing_getrs_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.ldb < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a vector, // ValuesIn takes each element (a vector), combines them, and feeds them to test_p // The combinations are { {N, lda, ldb}, stride_scale, batch_count } INSTANTIATE_TEST_SUITE_P(hipblasGetrsStridedBatched, getrs_strided_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); INSTANTIATE_TEST_SUITE_P(hipblasGetrsStridedBatchedBadArg, getrs_strided_batched_gtest_bad_arg, Combine(ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/hbmv_gtest.cpp000066400000000000000000000205321434647641600213600ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_hbmv.hpp" #include "testing_hbmv_batched.hpp" #include "testing_hbmv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, char, double, int, bool> hbmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, lda, K}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1}, // {10, 10, 2}, // {600,500, 500}, {1000, 1000, 501}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {0, -1}, {-1, -1}, // {10, 100}, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {1.0, 0.0}, {-1.0, -1.0}, {2.0, 1.0}, {0.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // shbmv/dhbmv, const vector transA_range = { 'N', 'T', 'C', }; const vector stride_scale_range = {2.0}; const vector batch_count_range = {-1, 0, 2}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 hbmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 HBMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_hbmv_arguments(hbmv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); char transA = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.lda = matrix_size[1]; arg.K = matrix_size[2]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; return arg; } class hbmv_gtest : public ::TestWithParam { protected: hbmv_gtest() {} virtual ~hbmv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(hbmv_gtest, hbmv_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hbmv_arguments(GetParam()); hipblasStatus_t status = testing_hbmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(hbmv_gtest, hbmv_gtest_batched_float_complex) { Arguments arg = setup_hbmv_arguments(GetParam()); hipblasStatus_t status = testing_hbmv_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(hbmv_gtest, hbmv_gtest_strided_batched_float_complex) { Arguments arg = setup_hbmv_arguments(GetParam()); hipblasStatus_t status = testing_hbmv_strided_batched(arg); if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha, beta}, {transA} } INSTANTIATE_TEST_SUITE_P(hipblasHbmv, hbmv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/hemm_gtest.cpp000066400000000000000000000211211434647641600213450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_hemm.hpp" #include "testing_hemm_batched.hpp" #include "testing_hemm_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, double, int> hemm_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda, ldb, ldc}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, 1, 1, 1}, {10, 10, 20, 100, 100}, {600, 500, 600, 600, 700}, }; const vector> alpha_beta_range = {{-5.0, 2.0, 3.0, -2.0}}; // vector of vector, each pair is a {side, uplo}; // side has two option "Lefe (L), Right (R)" // uplo has two "Lower (L), Upper (U)" // Each letter is capitalizied, e.g. do not use 'l', but use 'L' instead. const vector> side_uplo_range = { {'L', 'L'}, {'R', 'L'}, {'L', 'U'}, {'R', 'U'}, }; const vector stride_scale_range = {2}; const vector batch_count_range = {2}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 hemm: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_hemm_arguments(hemm_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); vector side_uplo = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.ldc = matrix_size[4]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.side = side_uplo[0]; arg.uplo = side_uplo[1]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; return arg; } class hemm_gtest : public ::TestWithParam { protected: hemm_gtest() {} virtual ~hemm_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(hemm_gtest, hemm_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hemm_arguments(GetParam()); hipblasStatus_t status = testing_hemm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldc < arg.M || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(hemm_gtest, hemm_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hemm_arguments(GetParam()); hipblasStatus_t status = testing_hemm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldc < arg.M || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(hemm_gtest, hemm_strided_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hemm_arguments(GetParam()); hipblasStatus_t status = testing_hemm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldc < arg.M || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda, ldb}, alpha, {side, diag} } INSTANTIATE_TEST_SUITE_P(hipblashemm_matrix_size, hemm_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(side_uplo_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range))); hipBLAS-rocm-5.5.1/clients/gtest/hemv_batched_gtest.cpp000066400000000000000000000152521434647641600230400ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_hemv_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, char, int, bool> hemv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1}, {1000, 1000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {-1, -1}, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {2.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // shemv/dhemv, const vector transA_range = { 'N', 'T', // 'C', }; // number of hemvs in batched hemv const vector batch_count_range = { -1, 0, 2, }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 hemv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 HEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_hemv_arguments(hemv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); char transA = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.lda = matrix_size[1]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.batch_count = batch_count; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.fortran = fortran; arg.timing = 0; return arg; } class hemv_batched_gtest : public ::TestWithParam { protected: hemv_batched_gtest() {} virtual ~hemv_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(hemv_batched_gtest, hemv_gtest_float_complex) { Arguments arg = setup_hemv_arguments(GetParam()); hipblasStatus_t status = testing_hemv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.lda < 1 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha, beta}, {transA}, {batch_count} } INSTANTIATE_TEST_SUITE_P(hipblasHemvBatched, hemv_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/hemv_gtest.cpp000066400000000000000000000154221434647641600213650ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_hemv.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, char, bool> hemv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1}, // {10, 10, 2}, // {600,500, 500}, {1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {0, -1}, {-1, -1}, // {10, 100}, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {1.0, 0.0}, {-1.0, -1.0}, {2.0, 1.0}, {0.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // shemv/dhemv, const vector transA_range = { 'N', 'T', 'C', }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 hemv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 HEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_hemv_arguments(hemv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); char transA = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.lda = matrix_size[1]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.fortran = fortran; arg.timing = 0; return arg; } class hemv_gtest : public ::TestWithParam { protected: hemv_gtest() {} virtual ~hemv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(hemv_gtest, hemv_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hemv_arguments(GetParam()); hipblasStatus_t status = testing_hemv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.lda < 1 || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha, beta}, {transA} } INSTANTIATE_TEST_SUITE_P(hipblasHemv, hemv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/hemv_strided_batched_gtest.cpp000066400000000000000000000162571434647641600245640ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_hemv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, vector, char, int, bool> hemv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1}, {1000, 1000}, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {-1, -1}, }; // a vector of single double values. This value will be multiplied by // appropriate dimensions to get the stride between vectors and matrices const vector stride_scale_range = { 1, 1.5, 2, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {2.0, 1.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // shemv/dhemv, const vector transA_range = { 'N', 'T', // 'C', }; // number of gemms in batched gemm const vector batch_count_range = { -1, 0, 2, }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 hemv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 HEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_hemv_arguments(hemv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); double stride_scale = std::get<2>(tup); vector alpha_beta = std::get<3>(tup); char transA = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.lda = matrix_size[1]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; // see the comments about stride_scale above arg.stride_scale = stride_scale; arg.batch_count = batch_count; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.beta = alpha_beta[1]; arg.transA = transA; arg.fortran = fortran; arg.timing = 0; return arg; } class hemv_strided_batched_gtest : public ::TestWithParam { protected: hemv_strided_batched_gtest() {} virtual ~hemv_strided_batched_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(hemv_strided_batched_gtest, hemv_gtest_float_complex) { Arguments arg = setup_hemv_arguments(GetParam()); hipblasStatus_t status = testing_hemv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.lda < 1 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {stride_scale}, {alpha, beta}, {transA}, {batch_count} } INSTANTIATE_TEST_SUITE_P(hipblasHemvStridedBatched, hemv_strided_batched_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/her2_gtest.cpp000066400000000000000000000261271434647641600212720ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_her2.hpp" #include "testing_her2_batched.hpp" #include "testing_her2_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, char, double, int, bool> her2_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, lda}; // add/delete as a group const vector> matrix_size_range = {{-1, -1}, {11, 11}, {16, 16}, {32, 32}, {65, 65}}; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = {{2, 1}, {0, 0}, {-1, -1}}; // vector, each entry is {alpha}; // add/delete single values, like {2.0} const vector> alpha_range = {{-0.5, 1.5}, {2.0, -1.0}, {0.0, 0.0}}; const vector uplo_range = { 'L', 'U', }; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 her2: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_her2_arguments(her2_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha = std::get<2>(tup); char uplo = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.lda = matrix_size[1]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.alpha = alpha[0]; arg.alphai = alpha[1]; arg.timing = 0; arg.uplo = uplo; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_her2_gtest : public ::TestWithParam { protected: blas2_her2_gtest() {} virtual ~blas2_her2_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // her2 TEST_P(blas2_her2_gtest, her2_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2_arguments(GetParam()); hipblasStatus_t status = testing_her2(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_her2_gtest, her2_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2_arguments(GetParam()); hipblasStatus_t status = testing_her2(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // her2_batched TEST_P(blas2_her2_gtest, her2_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2_arguments(GetParam()); hipblasStatus_t status = testing_her2_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_her2_gtest, her2_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2_arguments(GetParam()); hipblasStatus_t status = testing_her2_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // her2_strided_batched TEST_P(blas2_her2_gtest, her2_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2_arguments(GetParam()); hipblasStatus_t status = testing_her2_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_her2_gtest, her2_strided_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2_arguments(GetParam()); hipblasStatus_t status = testing_her2_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasHer2, blas2_her2_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_range), ValuesIn(uplo_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/her2k_gtest.cpp000066400000000000000000000302061434647641600214360ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_her2k.hpp" #include "testing_her2k_batched.hpp" #include "testing_her2k_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, char, char, double, int, bool> her2k_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, K, lda, ldb, ldc}; // add/delete as a group const vector> matrix_size_range = {{-1, -1, -1, -1, -1}, {11, 6, 11, 11, 11}, {16, 15, 16, 16, 16}, {32, 12, 32, 32, 32}, {65, 4, 65, 65, 65}}; // vector, each entry is {alpha, alphai, beta, betai}; // add/delete single values, like {2.0} const vector> alpha_beta_range = {{-0.5, 1.5, 2.0, 1.5}, {2.0, 1.0, 2.0, 1.0}, {0.0, 0.0, 0.0, 0.0}}; const vector uplo_range = { 'L', 'U', }; const vector transA_range = {'N', 'C'}; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 her2k: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_her2k_arguments(her2k_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); char uplo = std::get<2>(tup); char transA = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.K = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.ldc = matrix_size[4]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.timing = 0; arg.uplo = uplo; arg.transA = transA; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas3_her2k_gtest : public ::TestWithParam { protected: blas3_her2k_gtest() {} virtual ~blas3_her2k_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // her2k TEST_P(blas3_her2k_gtest, her2k_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2k_arguments(GetParam()); hipblasStatus_t status = testing_her2k(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K))) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_her2k_gtest, her2k_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2k_arguments(GetParam()); hipblasStatus_t status = testing_her2k(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K))) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // her2k_batched TEST_P(blas3_her2k_gtest, her2k_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2k_arguments(GetParam()); hipblasStatus_t status = testing_her2k_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_her2k_gtest, her2k_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2k_arguments(GetParam()); hipblasStatus_t status = testing_her2k_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // her2k_strided_batched TEST_P(blas3_her2k_gtest, her2k_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2k_arguments(GetParam()); hipblasStatus_t status = testing_her2k_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_her2k_gtest, her2k_strided_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her2k_arguments(GetParam()); hipblasStatus_t status = testing_her2k_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasHer2k, blas3_her2k_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(uplo_range), ValuesIn(transA_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/her_gtest.cpp000066400000000000000000000255101434647641600212030ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_her.hpp" #include "testing_her_batched.hpp" #include "testing_her_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, char, double, int, bool> her_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, lda}; // add/delete as a group const vector> matrix_size_range = {{-1, -1}, {11, 11}, {16, 16}, {32, 32}, {65, 65}}; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = {{-1}, {0}, {2}}; // vector, each entry is {alpha}; // add/delete single values, like {2.0} const vector alpha_range = {-0.5, 2.0, 0.0}; const vector uplo_range = { 'L', 'U', }; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 her: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_her_arguments(her_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); double alpha = std::get<2>(tup); char uplo = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.lda = matrix_size[1]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.alpha = alpha; arg.timing = 0; arg.uplo = uplo; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_her_gtest : public ::TestWithParam { protected: blas2_her_gtest() {} virtual ~blas2_her_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // her TEST_P(blas2_her_gtest, her_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her_arguments(GetParam()); hipblasStatus_t status = testing_her(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_her_gtest, her_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her_arguments(GetParam()); hipblasStatus_t status = testing_her(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // her_batched TEST_P(blas2_her_gtest, her_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her_arguments(GetParam()); hipblasStatus_t status = testing_her_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_her_gtest, her_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her_arguments(GetParam()); hipblasStatus_t status = testing_her_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // her_strided_batched TEST_P(blas2_her_gtest, her_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her_arguments(GetParam()); hipblasStatus_t status = testing_her_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_her_gtest, her_strided_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_her_arguments(GetParam()); hipblasStatus_t status = testing_her_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasHer, blas2_her_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_range), ValuesIn(uplo_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/herk_gtest.cpp000066400000000000000000000270761434647641600213670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_herk.hpp" #include "testing_herk_batched.hpp" #include "testing_herk_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, char, char, double, int, bool> herk_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, K, lda, ldc}; // add/delete as a group const vector> matrix_size_range = {{-1, -1, -1, -1}, {11, 6, 11, 11}, {16, 15, 16, 16}, {32, 12, 32, 32}, {65, 4, 65, 65}}; // vector, each entry is {alpha, alphai, beta, betai}; // add/delete single values, like {2.0} const vector> alpha_beta_range = {{-0.5, 1.5, 2.0, 1.5}, {2.0, 1.0, 2.0, 1.0}, {0.0, 0.0, 0.0, 0.0}}; const vector uplo_range = { 'L', 'U', }; const vector transA_range = {'N', 'C'}; //T', 'C' }; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 herk: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_herk_arguments(herk_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); char uplo = std::get<2>(tup); char transA = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.K = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldc = matrix_size[3]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.timing = 0; arg.uplo = uplo; arg.transA = transA; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas3_herk_gtest : public ::TestWithParam { protected: blas3_herk_gtest() {} virtual ~blas3_herk_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // herk TEST_P(blas3_herk_gtest, herk_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herk_arguments(GetParam()); hipblasStatus_t status = testing_herk(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_herk_gtest, herk_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herk_arguments(GetParam()); hipblasStatus_t status = testing_herk(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // herk_batched TEST_P(blas3_herk_gtest, herk_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herk_arguments(GetParam()); hipblasStatus_t status = testing_herk_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_herk_gtest, herk_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herk_arguments(GetParam()); hipblasStatus_t status = testing_herk_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // herk_strided_batched TEST_P(blas3_herk_gtest, herk_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herk_arguments(GetParam()); hipblasStatus_t status = testing_herk_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_herk_gtest, herk_strided_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herk_arguments(GetParam()); hipblasStatus_t status = testing_herk_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasHerk, blas3_herk_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(uplo_range), ValuesIn(transA_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/herkx_gtest.cpp000066400000000000000000000302061434647641600215440ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_herkx.hpp" #include "testing_herkx_batched.hpp" #include "testing_herkx_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, char, char, double, int, bool> herkx_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, K, lda, ldb, ldc}; // add/delete as a group const vector> matrix_size_range = {{-1, -1, -1, -1, -1}, {11, 6, 11, 11, 11}, {16, 15, 16, 16, 16}, {32, 12, 32, 32, 32}, {65, 4, 65, 65, 65}}; // vector, each entry is {alpha, alphai, beta, betai}; // add/delete single values, like {2.0} const vector> alpha_beta_range = {{-0.5, 1.5, 2.0, 1.5}, {2.0, 1.0, 2.0, 1.0}, {0.0, 0.0, 0.0, 0.0}}; const vector uplo_range = { 'L', 'U', }; const vector transA_range = {'N', 'C'}; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 herkx: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_herkx_arguments(herkx_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); char uplo = std::get<2>(tup); char transA = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.K = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.ldc = matrix_size[4]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.timing = 0; arg.uplo = uplo; arg.transA = transA; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas3_herkx_gtest : public ::TestWithParam { protected: blas3_herkx_gtest() {} virtual ~blas3_herkx_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // herkx TEST_P(blas3_herkx_gtest, herkx_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herkx_arguments(GetParam()); hipblasStatus_t status = testing_herkx(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K))) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_herkx_gtest, herkx_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herkx_arguments(GetParam()); hipblasStatus_t status = testing_herkx(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K))) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // herkx_batched TEST_P(blas3_herkx_gtest, herkx_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herkx_arguments(GetParam()); hipblasStatus_t status = testing_herkx_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_herkx_gtest, herkx_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herkx_arguments(GetParam()); hipblasStatus_t status = testing_herkx_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // herkx_strided_batched TEST_P(blas3_herkx_gtest, herkx_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herkx_arguments(GetParam()); hipblasStatus_t status = testing_herkx_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_herkx_gtest, herkx_strided_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_herkx_arguments(GetParam()); hipblasStatus_t status = testing_herkx_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasHerkx, blas3_herkx_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(uplo_range), ValuesIn(transA_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/hipblas_gtest_main.cpp000066400000000000000000000124461434647641600230570ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "program_options.hpp" #include "argument_model.hpp" #include "hipblas_data.hpp" #include "hipblas_parse_data.hpp" #include "hipblas_test.hpp" #include #include #include #include #include #include "clients_common.hpp" #include "utility.h" #define STRINGIFY(s) STRINGIFY_HELPER(s) #define STRINGIFY_HELPER(s) #s #if defined(GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST) #define HIPBLAS_ALLOW_UNINSTANTIATED_GTEST(testclass) \ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(testclass); #else #define HIPBLAS_ALLOW_UNINSTANTIATED_GTEST(testclass) #endif #define INSTANTIATE_TEST_CATEGORY(testclass, category) \ HIPBLAS_ALLOW_UNINSTANTIATED_GTEST(testclass) \ INSTANTIATE_TEST_SUITE_P( \ category, \ testclass, \ testing::ValuesIn(HipBLAS_TestData::begin([](const Arguments& arg) { return true; }), \ HipBLAS_TestData::end()), \ testclass::PrintToStringParamName()); struct data_driven : public testing::TestWithParam { virtual void TestBody() {} void operator()(const Arguments& arg) { run_bench_test(const_cast(arg), 1, 0); } struct PrintToStringParamName { std::string operator()(const testing::TestParamInfo& info) const { std::string name(info.param.category); get_test_name(info.param, name); // random trailer used as logged Arguments may not be unique char buf[256]; sprintf(buf, "_%d", rand()); name += buf; return name; } }; }; TEST_P(data_driven, yaml) { return data_driven()(GetParam()); } INSTANTIATE_TEST_CATEGORY(data_driven, _); static void print_version_info() { // clang-format off std::cout << "hipBLAS version " STRINGIFY(hipblasVersionMajor) "." STRINGIFY(hipblasVersionMinor) "." STRINGIFY(hipblasVersionPatch) "." STRINGIFY(hipblasVersionTweak) << std::endl; // clang-format on } int hipblas_test_datafile() { int ret = 0; for(Arguments arg : HipBLAS_TestData()) ret |= run_bench_test(arg, 1, 0); test_cleanup::cleanup(); return ret; } /* ===================================================================== Main function: =================================================================== */ int main(int argc, char** argv) { print_version_info(); // print device info int device_count = query_device_property(); if(device_count <= 0) { std::cerr << "Error: No devices found" << std::endl; return EXIT_FAILURE; } set_device(0); // use first device bool datafile = hipblas_parse_data(argc, argv); ::testing::InitGoogleTest(&argc, argv); int status = 0; if(!datafile) status = RUN_ALL_TESTS(); else { // remove standard non-yaml based gtests defined with explicit code. This depends // on the GTEST name convention, so for now internal tests must follow the // pattern INSTANTIATE_TEST_SUITE_P(*, *_gtest, *) to be filtered from yaml set // via this GTEST_FLAG line: ::testing::GTEST_FLAG(filter) = ::testing::GTEST_FLAG(filter) + "-*_gtest.*"; status = RUN_ALL_TESTS(); } print_version_info(); // redundant, but convenient when tests fail return status; } hipBLAS-rocm-5.5.1/clients/gtest/hpmv_gtest.cpp000066400000000000000000000215401434647641600213760ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_hpmv.hpp" #include "testing_hpmv_batched.hpp" #include "testing_hpmv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, char, double, int, bool> hpmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N}; // add/delete as a group const vector matrix_size_range = { -1, 1000, }; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = { {2, 1}, {0, -1}, {-1, -1}, // {10, 100}, }; // vector of vector, each pair is a {alpha, beta}; // add/delete this list in pairs, like {2.0, 4.0} const vector> alpha_beta_range = { {1.0, 0.0, 0.0, 0.0}, {-1.0, 1.5, -1.0, 2.0}, {2.0, -1.5, 1.0, 1.5}, {0.0, 0.0, 1.0, 0.0}, }; // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) internally in // shpmv/dhpmv, const vector transA_range = { 'N', 'T', 'C', }; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 2}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 hpmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 HPMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_hpmv_arguments(hpmv_tuple tup) { int matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); char transA = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; // the first element of alpha_beta_range is always alpha, and the second is always beta arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.transA = transA; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class hpmv_gtest : public ::TestWithParam { protected: hpmv_gtest() {} virtual ~hpmv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(hpmv_gtest, hpmv_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpmv_arguments(GetParam()); hipblasStatus_t status = testing_hpmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(hpmv_gtest, hpmv_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpmv_arguments(GetParam()); hipblasStatus_t status = testing_hpmv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(hpmv_gtest, hpmv_strided_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpmv_arguments(GetParam()); hipblasStatus_t status = testing_hpmv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M}, {incx,incy} {alpha, alphai, beta, betai}, {transA}, {stride_scale}, {batch_count} } INSTANTIATE_TEST_SUITE_P(hipblasHpmv, hpmv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(transA_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/hpr2_gtest.cpp000066400000000000000000000256171434647641600213100ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_hpr2.hpp" #include "testing_hpr2_batched.hpp" #include "testing_hpr2_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, char, double, int, bool> hpr2_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N}; // add/delete as a group const vector matrix_size_range = {-1, 11, 16, 32, 65}; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 1} const vector> incx_incy_range = {{2, 1}, {0, 0}, {-1, -1}}; // vector, each entry is {alpha}; // add/delete single values, like {2.0} const vector> alpha_range = {{-0.5, 1.5}, {2.0, -1.0}, {0.0, 0.0}}; const vector uplo_range = { 'L', 'U', }; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 hpr2: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_hpr2_arguments(hpr2_tuple tup) { int matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha = std::get<2>(tup); char uplo = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.alpha = alpha[0]; arg.alphai = alpha[1]; arg.timing = 0; arg.uplo = uplo; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_hpr2_gtest : public ::TestWithParam { protected: blas2_hpr2_gtest() {} virtual ~blas2_hpr2_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // hpr2 TEST_P(blas2_hpr2_gtest, hpr2_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr2_arguments(GetParam()); hipblasStatus_t status = testing_hpr2(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_hpr2_gtest, hpr2_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr2_arguments(GetParam()); hipblasStatus_t status = testing_hpr2(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // hpr2_batched TEST_P(blas2_hpr2_gtest, hpr2_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr2_arguments(GetParam()); hipblasStatus_t status = testing_hpr2_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_hpr2_gtest, hpr2_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr2_arguments(GetParam()); hipblasStatus_t status = testing_hpr2_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // hpr2_strided_batched TEST_P(blas2_hpr2_gtest, hpr2_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr2_arguments(GetParam()); hipblasStatus_t status = testing_hpr2_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_hpr2_gtest, hpr2_strided_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr2_arguments(GetParam()); hipblasStatus_t status = testing_hpr2_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasHpr2, blas2_hpr2_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_range), ValuesIn(uplo_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/hpr_gtest.cpp000066400000000000000000000243651434647641600212250ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_hpr.hpp" #include "testing_hpr_batched.hpp" #include "testing_hpr_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple hpr_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N}; // add/delete as a group const vector matrix_size_range = {-1, 11, 16, 32, 65}; // vector of vector, each pair is a {incx}; // add/delete this list in pairs, like {1} const vector incx_incy_range = {-1, 0, 2}; // vector, each entry is {alpha}; // add/delete single values, like {2.0} const vector alpha_range = {-0.5, 2.0, 0.0}; const vector uplo_range = { 'L', 'U', }; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 hpr: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_hpr_arguments(hpr_tuple tup) { Arguments arg; arg.N = std::get<0>(tup); arg.incx = std::get<1>(tup); arg.alpha = std::get<2>(tup); arg.uplo = std::get<3>(tup); arg.stride_scale = std::get<4>(tup); arg.batch_count = std::get<5>(tup); arg.fortran = std::get<6>(tup); arg.timing = 0; return arg; } class blas2_hpr_gtest : public ::TestWithParam { protected: blas2_hpr_gtest() {} virtual ~blas2_hpr_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // hpr TEST_P(blas2_hpr_gtest, hpr_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr_arguments(GetParam()); hipblasStatus_t status = testing_hpr(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_hpr_gtest, hpr_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr_arguments(GetParam()); hipblasStatus_t status = testing_hpr(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // hpr_batched TEST_P(blas2_hpr_gtest, hpr_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr_arguments(GetParam()); hipblasStatus_t status = testing_hpr_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_hpr_gtest, hpr_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr_arguments(GetParam()); hipblasStatus_t status = testing_hpr_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // hpr_strided_batched TEST_P(blas2_hpr_gtest, hpr_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr_arguments(GetParam()); hipblasStatus_t status = testing_hpr_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_hpr_gtest, hpr_strided_batched_gtest_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_hpr_arguments(GetParam()); hipblasStatus_t status = testing_hpr_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasHpr, blas2_hpr_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_range), ValuesIn(uplo_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/nrm2_ex_gtest.cpp000066400000000000000000000173541434647641600220060ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_nrm2_batched_ex.hpp" #include "testing_nrm2_ex.hpp" #include "testing_nrm2_strided_batched_ex.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, bool> nrm2_ex_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* When you see this error, do not hack this source code, hack the Makefile. It is due to compilation. from 'testing::internal::CartesianProductHolder3, testing::internal::ParamGenerator >, testing::internal::ParamGenerator > >' to 'testing::internal::ParamGenerator >, std::vector > > >' */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ const int N_range[] = {-1, 10, 500, 1000, 7111}; const int incx_range[] = {1, -1}; const double stride_scale_range[] = {1.0, 2.5}; const int batch_count_range[] = {-1, 0, 1, 2, 10}; // All configs supported in rocBLAS and cuBLAS const vector> precisions{ {HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_32F}, {HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F}, {HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F}, {HIPBLAS_C_32F, HIPBLAS_R_32F, HIPBLAS_R_32F}, {HIPBLAS_C_64F, HIPBLAS_R_64F, HIPBLAS_R_64F}, }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ class nrm2_ex_gtest : public ::TestWithParam { protected: nrm2_ex_gtest() {} virtual ~nrm2_ex_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; Arguments setup_nrm2_ex_arguments(nrm2_ex_tuple tup) { Arguments arg; arg.N = std::get<0>(tup); arg.incx = std::get<1>(tup); arg.stride_scale = std::get<2>(tup); arg.batch_count = std::get<3>(tup); vector precision_types = std::get<4>(tup); arg.fortran = std::get<5>(tup); arg.a_type = precision_types[0]; arg.b_type = precision_types[1]; arg.compute_type = precision_types[2]; arg.timing = 0; // disable timing data print out. Not supposed to collect performance data in gtest return arg; } // nrm2 tests TEST_P(nrm2_ex_gtest, nrm2_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_nrm2_ex_arguments(GetParam()); hipblasStatus_t status = testing_nrm2_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(nrm2_ex_gtest, nrm2_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_nrm2_ex_arguments(GetParam()); hipblasStatus_t status = testing_nrm2_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(nrm2_ex_gtest, nrm2_strided_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_nrm2_ex_arguments(GetParam()); hipblasStatus_t status = testing_nrm2_strided_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // Values is for a single item; ValuesIn is for an array // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p INSTANTIATE_TEST_SUITE_P(hipblasNrm2Ex, nrm2_ex_gtest, Combine(ValuesIn(N_range), ValuesIn(incx_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(precisions), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/rot_ex_gtest.cpp000066400000000000000000000202271434647641600217250ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_rot_batched_ex.hpp" #include "testing_rot_ex.hpp" #include "testing_rot_strided_batched_ex.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, double, int, vector, bool> rot_ex_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* When you see this error, do not hack this source code, hack the Makefile. It is due to compilation. from 'testing::internal::CartesianProductHolder3, testing::internal::ParamGenerator >, testing::internal::ParamGenerator > >' to 'testing::internal::ParamGenerator >, std::vector > > >' */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ const int N_range[] = {-1, 10, 500, 1000, 7111, 10000}; // vector of vector, each pair is a {incx, incy}; // add/delete this list in pairs, like {1, 2} // negative increments use absolute value for comparisons, so // some combinations may not work as expected. {-1, -1} as done // here is fine const vector> incx_incy_range = { {1, 1}, {-1, -1}, }; const double stride_scale_range[] = {1.0, 2.5}; const int batch_count_range[] = {-1, 0, 1, 2, 10}; // All configs supported in rocBLAS and cuBLAS const vector> precisions{ {HIPBLAS_R_16B, HIPBLAS_R_16B, HIPBLAS_R_16B, HIPBLAS_R_32F}, {HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_32F}, {HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F}, {HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F}, {HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F}, {HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F}, {HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_R_32F, HIPBLAS_C_32F}, {HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_R_64F, HIPBLAS_C_64F}}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ class rot_ex_gtest : public ::TestWithParam { protected: rot_ex_gtest() {} virtual ~rot_ex_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; Arguments setup_rot_ex_arguments(rot_ex_tuple tup) { Arguments arg; arg.N = std::get<0>(tup); vector incx_incy = std::get<1>(tup); arg.stride_scale = std::get<2>(tup); arg.batch_count = std::get<3>(tup); vector precision_types = std::get<4>(tup); arg.fortran = std::get<5>(tup); arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.a_type = precision_types[0]; arg.b_type = precision_types[1]; arg.c_type = precision_types[2]; arg.compute_type = precision_types[3]; arg.timing = 0; // disable timing data print out. Not supposed to collect performance data in gtest return arg; } // rot TEST_P(rot_ex_gtest, rot_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_rot_ex_arguments(GetParam()); hipblasStatus_t status = testing_rot_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(rot_ex_gtest, rot_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_rot_ex_arguments(GetParam()); hipblasStatus_t status = testing_rot_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(rot_ex_gtest, rot_strided_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_rot_ex_arguments(GetParam()); hipblasStatus_t status = testing_rot_strided_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // Values is for a single item; ValuesIn is for an array // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p INSTANTIATE_TEST_SUITE_P(hipblasRotEx, rot_ex_gtest, Combine(ValuesIn(N_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(precisions), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/sbmv_gtest.cpp000066400000000000000000000216101434647641600213710ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_sbmv.hpp" #include "testing_sbmv_batched.hpp" #include "testing_sbmv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, double, int, bool> sbmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, K, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1}, {11, 10, 11}, {16, 4, 16}, {32, 16, 32}, {65, 64, 65} // {10, 10, 2}, // {600,500, 500}, // {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000} }; // vector of vector, each element is an {incx, incy} const vector> incx_incy_range = { {-1, -1}, {0, 0}, {1, 2} // {10, 100} }; // {alpha, alphai, beta, betai} const vector> alpha_beta_range = {{-0.5, 1.5, 1.5, -1.0}, {2.0, -1.0, -1.0, 2.0}, {0.0, 0.0, 0.0, 0.0}}; // add/delete single values, like {2.0} const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 sbmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_sbmv_arguments(sbmv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.K = matrix_size[1]; arg.lda = matrix_size[2]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_sbmv_gtest : public ::TestWithParam { protected: blas2_sbmv_gtest() {} virtual ~blas2_sbmv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(blas2_sbmv_gtest, sbmv_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_sbmv_arguments(GetParam()); hipblasStatus_t status = testing_sbmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_sbmv_gtest, sbmv_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_sbmv_arguments(GetParam()); hipblasStatus_t status = testing_sbmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_sbmv_gtest, sbmv_batched_float) { Arguments arg = setup_sbmv_arguments(GetParam()); hipblasStatus_t status = testing_sbmv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_sbmv_gtest, sbmv_strided_batched_float) { Arguments arg = setup_sbmv_arguments(GetParam()); hipblasStatus_t status = testing_sbmv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} } INSTANTIATE_TEST_SUITE_P(hipblassbmv, blas2_sbmv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/scal_ex_gtest.cpp000066400000000000000000000220711434647641600220420ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_scal_batched_ex.hpp" #include "testing_scal_ex.hpp" #include "testing_scal_strided_batched_ex.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, int, double, int, vector, bool> scal_ex_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* When you see this error, do not hack this source code, hack the Makefile. It is due to compilation. from 'testing::internal::CartesianProductHolder3, testing::internal::ParamGenerator >, testing::internal::ParamGenerator > >' to 'testing::internal::ParamGenerator >, std::vector > > >' */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ const int N_range[] = {-1, 10, 500, 1000, 7111, 10000}; const vector> alpha_range = {{1.0, 2.0}}; const int incx_range[] = {1, -1}; const double stride_scale_range[] = {1.0, 2.5}; const int batch_count_range[] = {-1, 0, 1, 2, 10}; // Supported rocBLAS configs const vector> precisions{// Not supported in cuBLAS #ifndef __HIP_PLATFORM_NVCC__ {HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_16F}, {HIPBLAS_R_16F, HIPBLAS_R_16F, HIPBLAS_R_32F}, {HIPBLAS_R_32F, HIPBLAS_C_32F, HIPBLAS_C_32F}, {HIPBLAS_R_64F, HIPBLAS_C_64F, HIPBLAS_C_64F}, #endif // Supported in both rocBLAS and cuBLAS {HIPBLAS_R_32F, HIPBLAS_R_16F, HIPBLAS_R_32F}, {HIPBLAS_R_32F, HIPBLAS_R_32F, HIPBLAS_R_32F}, {HIPBLAS_R_64F, HIPBLAS_R_64F, HIPBLAS_R_64F}, {HIPBLAS_C_32F, HIPBLAS_C_32F, HIPBLAS_C_32F}, {HIPBLAS_C_64F, HIPBLAS_C_64F, HIPBLAS_C_64F} }; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ class scal_ex_gtest : public ::TestWithParam { protected: scal_ex_gtest() {} virtual ~scal_ex_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; Arguments setup_scal_ex_arguments(scal_ex_tuple tup) { Arguments arg; arg.N = std::get<0>(tup); arg.alpha = std::get<1>(tup)[0]; arg.alphai = std::get<1>(tup)[1]; arg.incx = std::get<2>(tup); arg.stride_scale = std::get<3>(tup); arg.batch_count = std::get<4>(tup); vector precision_types = std::get<5>(tup); arg.fortran = std::get<6>(tup); arg.a_type = precision_types[0]; arg.b_type = precision_types[1]; arg.compute_type = precision_types[2]; arg.timing = 0; // disable timing data print out. Not supposed to collect performance data in gtest return arg; } // scal tests TEST_P(scal_ex_gtest, scal_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_scal_ex_arguments(GetParam()); hipblasStatus_t status = testing_scal_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.a_type == HIPBLAS_R_16F || (arg.a_type == HIPBLAS_R_32F && arg.b_type == HIPBLAS_C_32F) || (arg.a_type == HIPBLAS_R_64F && arg.b_type == HIPBLAS_C_64F)) { EXPECT_EQ(HIPBLAS_STATUS_NOT_SUPPORTED, status); // unsupported CUDA configs } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(scal_ex_gtest, scal_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_scal_ex_arguments(GetParam()); hipblasStatus_t status = testing_scal_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(scal_ex_gtest, scal_strided_batched_ex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_scal_ex_arguments(GetParam()); hipblasStatus_t status = testing_scal_strided_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx < 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // Values is for a single item; ValuesIn is for an array // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p INSTANTIATE_TEST_SUITE_P(hipblasScalEx, scal_ex_gtest, Combine(ValuesIn(N_range), ValuesIn(alpha_range), ValuesIn(incx_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(precisions), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/set_get_atomics_mode_gtest.cpp000066400000000000000000000055441434647641600246070ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_set_get_atomics_mode.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple set_get_atomics_mode_tuple; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS set_get_atomics_mode: =================================================================== */ /* ============================Setup Arguments======================================= */ Arguments setup_set_get_atomics_mode_arguments(set_get_atomics_mode_tuple tup) { Arguments arg; arg.fortran = std::get<0>(tup); return arg; } class set_get_atomics_mode_gtest : public ::TestWithParam { protected: set_get_atomics_mode_gtest() {} virtual ~set_get_atomics_mode_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(set_get_atomics_mode_gtest, default) { Arguments arg = setup_set_get_atomics_mode_arguments(GetParam()); hipblasStatus_t status = testing_set_get_atomics_mode(arg); EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); } INSTANTIATE_TEST_SUITE_P(hipblas_auxiliary_small, set_get_atomics_mode_gtest, Combine(ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/set_get_matrix_gtest.cpp000066400000000000000000000167711434647641600234540ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_set_get_matrix.hpp" #include "testing_set_get_matrix_async.hpp" #include "utility.h" #include #include #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, bool> set_get_matrix_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // small sizes // vector of vector, each triple is a {rows, cols}; // add/delete this list in pairs, like {3, 4} const vector> rows_cols_range = {{3, 3}, {3, 30}}; // vector of vector, each triple is a {lda, ldb, ldc}; // add/delete this list in pairs, like {3, 4, 3} const vector> lda_ldb_ldc_range = {{3, 3, 3}, {3, 3, 4}, {3, 3, 5}, {3, 4, 3}, {3, 4, 4}, {3, 4, 5}, {3, 5, 3}, {3, 5, 4}, {3, 5, 5}, {5, 3, 3}, {5, 3, 4}, {5, 3, 5}, {5, 4, 3}, {5, 4, 4}, {5, 4, 5}, {5, 5, 3}, {5, 5, 4}, {5, 5, 5}}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS auxiliary: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_set_get_matrix_arguments(set_get_matrix_tuple tup) { vector rows_cols = std::get<0>(tup); vector lda_ldb_ldc = std::get<1>(tup); Arguments arg; arg.rows = rows_cols[0]; arg.cols = rows_cols[1]; arg.lda = lda_ldb_ldc[0]; arg.ldb = lda_ldb_ldc[1]; arg.ldc = lda_ldb_ldc[2]; return arg; } class set_matrix_get_matrix_gtest : public ::TestWithParam { protected: set_matrix_get_matrix_gtest() {} virtual ~set_matrix_get_matrix_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(set_matrix_get_matrix_gtest, float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_set_get_matrix_arguments(GetParam()); hipblasStatus_t status = testing_set_get_matrix(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.rows < 0 || arg.cols <= 0 || arg.lda <= 0 || arg.ldb <= 0 || arg.ldc <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(set_matrix_get_matrix_gtest, async_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_set_get_matrix_arguments(GetParam()); hipblasStatus_t status = testing_set_get_matrix_async(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.rows < 0 || arg.cols <= 0 || arg.lda <= 0 || arg.ldb <= 0 || arg.ldc <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasAuxiliary_small, set_matrix_get_matrix_gtest, Combine(ValuesIn(rows_cols_range), ValuesIn(lda_ldb_ldc_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/set_get_pointer_mode_gtest.cpp000066400000000000000000000053251434647641600246250ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "hipblas.h" #ifdef GOOGLE_TEST #include #endif #include //#include "utility.h" using std::vector; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== BLAS set-get_pointer_mode: =================================================================== */ TEST(hipblas_set_pointer, hipblas_get_pointer) { hipblasPointerMode_t mode = HIPBLAS_POINTER_MODE_DEVICE; hipblasHandle_t handle; hipblasCreate(&handle); hipblasStatus_t status = hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE); EXPECT_EQ(status, HIPBLAS_STATUS_SUCCESS); status = hipblasGetPointerMode(handle, &mode); EXPECT_EQ(status, HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(HIPBLAS_POINTER_MODE_DEVICE, mode); status = hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST); EXPECT_EQ(status, HIPBLAS_STATUS_SUCCESS); status = hipblasGetPointerMode(handle, &mode); EXPECT_EQ(status, HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(HIPBLAS_POINTER_MODE_HOST, mode); hipblasDestroy(handle); } hipBLAS-rocm-5.5.1/clients/gtest/set_get_vector_gtest.cpp000066400000000000000000000162201434647641600234370ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_set_get_vector.hpp" #include "testing_set_get_vector_async.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, bool> set_get_vector_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M}; // add/delete as a group const int M_range[] = {600}; // vector of vector, each triple is a {incx, incy, incd}; // add/delete this list in pairs, like {1, 1, 1} const vector> incx_incy_incd_range = {{1, 1, 1}, {1, 1, 3}, {1, 2, 1}, {1, 2, 2}, {1, 3, 1}, {1, 3, 3}, {3, 1, 1}, {3, 1, 3}, {3, 2, 1}, {3, 2, 2}, {3, 3, 1}, {3, 3, 3}}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS set_get_vector: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_set_get_vector_arguments(set_get_vector_tuple tup) { int M = std::get<0>(tup); vector incx_incy_incd = std::get<1>(tup); Arguments arg; // see the comments about vector_size_range above arg.M = M; // see the comments about matrix_size_range above arg.incx = incx_incy_incd[0]; arg.incy = incx_incy_incd[1]; arg.incd = incx_incy_incd[2]; return arg; } class set_vector_get_vector_gtest : public ::TestWithParam { protected: set_vector_get_vector_gtest() {} virtual ~set_vector_get_vector_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // TEST_P(set_vector_get_vector_gtest, set_get_vector_float) TEST_P(set_vector_get_vector_gtest, float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_set_get_vector_arguments(GetParam()); hipblasStatus_t status = testing_set_get_vector(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx <= 0 || arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(set_vector_get_vector_gtest, async_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_set_get_vector_arguments(GetParam()); hipblasStatus_t status = testing_set_get_vector_async(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx <= 0 || arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblas_auxiliary_small, set_vector_get_vector_gtest, Combine(ValuesIn(M_range), ValuesIn(incx_incy_incd_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/spmv_gtest.cpp000066400000000000000000000211461434647641600214130ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_spmv.hpp" #include "testing_spmv_batched.hpp" #include "testing_spmv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, int, bool> spmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M}; // add/delete as a group const vector matrix_size_range = { -1, 11, 16, 32, 65 // {10, 10, 2}, // {600,500, 500}, // {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000} }; // vector of vector, each element is an {incx, incy} const vector> incx_incy_range = { {-1, -1}, {0, 0}, {1, 2} // {10, 100} }; // {alpha, alphai, beta, betai} const vector> alpha_beta_range = {{-0.5, 1.5, 1.5, -1.0}, {2.0, -1.0, -1.0, 2.0}, {0.0, 0.0, 0.0, 0.0}}; // add/delete single values, like {2.0} const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 spmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_spmv_arguments(spmv_tuple tup) { int matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_spmv_gtest : public ::TestWithParam { protected: blas2_spmv_gtest() {} virtual ~blas2_spmv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(blas2_spmv_gtest, spmv_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spmv_arguments(GetParam()); hipblasStatus_t status = testing_spmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_spmv_gtest, spmv_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spmv_arguments(GetParam()); hipblasStatus_t status = testing_spmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_spmv_gtest, spmv_batched_float) { Arguments arg = setup_spmv_arguments(GetParam()); hipblasStatus_t status = testing_spmv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_spmv_gtest, spmv_strided_batched_float) { Arguments arg = setup_spmv_arguments(GetParam()); hipblasStatus_t status = testing_spmv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M}, {incx,incy} } INSTANTIATE_TEST_SUITE_P(hipblasspmv, blas2_spmv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/spr2_gtest.cpp000066400000000000000000000201741434647641600213140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_spr2.hpp" #include "testing_spr2_batched.hpp" #include "testing_spr2_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, double, double, int, bool> spr2_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, lda}; // add/delete as a group const vector matrix_size_range = {-1, 11, 16, 32, 65}; // vector of vector, each element is an {incx, incy} const vector> incx_incy_range = { {-1, -1}, {0, 0}, {1, 2} // {10, 100} }; // vector, each entry is {alpha}; // add/delete single values, like {2.0} const vector alpha_range = {-0.5, 2.0, 0.0}; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 spr2: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_spr2_arguments(spr2_tuple tup) { int matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); double alpha = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[0]; arg.alpha = alpha; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_spr2_gtest : public ::TestWithParam { protected: blas2_spr2_gtest() {} virtual ~blas2_spr2_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // spr2 TEST_P(blas2_spr2_gtest, spr2_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spr2_arguments(GetParam()); hipblasStatus_t status = testing_spr2(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // spr2_batched TEST_P(blas2_spr2_gtest, spr2_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spr2_arguments(GetParam()); hipblasStatus_t status = testing_spr2_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // spr2_strided_batched TEST_P(blas2_spr2_gtest, spr2_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spr2_arguments(GetParam()); hipblasStatus_t status = testing_spr2_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasSpr2, blas2_spr2_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/spr_gtest.cpp000066400000000000000000000241651434647641600212360ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_spr.hpp" #include "testing_spr_batched.hpp" #include "testing_spr_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple spr_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N}; // add/delete as a group const vector matrix_size_range = {-1, 11, 16, 32, 65}; // vector of vector, each element is an {incx} const vector incx_range = {-2, 1, 0, 2}; // vector, each entry is {alpha}; // add/delete single values, like {2.0} const vector alpha_range = {-0.5, 2.0, 0.0}; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 spr: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_spr_arguments(spr_tuple tup) { Arguments arg; arg.N = std::get<0>(tup); arg.incx = std::get<1>(tup); double alpha = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); arg.fortran = std::get<5>(tup); arg.alpha = alpha; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; return arg; } class blas2_spr_gtest : public ::TestWithParam { protected: blas2_spr_gtest() {} virtual ~blas2_spr_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // spr TEST_P(blas2_spr_gtest, spr_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spr_arguments(GetParam()); hipblasStatus_t status = testing_spr(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_spr_gtest, spr_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spr_arguments(GetParam()); hipblasStatus_t status = testing_spr(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // spr_batched TEST_P(blas2_spr_gtest, spr_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spr_arguments(GetParam()); hipblasStatus_t status = testing_spr_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_spr_gtest, spr_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spr_arguments(GetParam()); hipblasStatus_t status = testing_spr_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // spr_strided_batched TEST_P(blas2_spr_gtest, spr_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spr_arguments(GetParam()); hipblasStatus_t status = testing_spr_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_spr_gtest, spr_strided_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_spr_arguments(GetParam()); hipblasStatus_t status = testing_spr_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {N}, {incx} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasSpr, blas2_spr_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_range), ValuesIn(alpha_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/symm_gtest.cpp000066400000000000000000000266021434647641600214150ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_symm.hpp" #include "testing_symm_batched.hpp" #include "testing_symm_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, double, int, bool> symm_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda, ldb, ldc}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, 1, 1, 1}, {600, 500, 600, 600, 700}, }; const vector> alpha_beta_range = {{-5.0, 2.0, 3.0, -2.0}}; // vector of vector, each pair is a {side, uplo}; // side has two option "Lefe (L), Right (R)" // uplo has two "Lower (L), Upper (U)" // Each letter is capitalizied, e.g. do not use 'l', but use 'L' instead. const vector> side_uplo_range = { {'L', 'L'}, {'R', 'L'}, {'L', 'U'}, {'R', 'U'}, }; const vector stride_scale_range = {2}; const vector batch_count_range = {2}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 symm: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_symm_arguments(symm_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); vector side_uplo = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.ldc = matrix_size[4]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.side = side_uplo[0]; arg.uplo = side_uplo[1]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class symm_gtest : public ::TestWithParam { protected: symm_gtest() {} virtual ~symm_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(symm_gtest, symm_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_symm_arguments(GetParam()); hipblasStatus_t status = testing_symm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldc < arg.M || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(symm_gtest, symm_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_symm_arguments(GetParam()); hipblasStatus_t status = testing_symm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldc < arg.M || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(symm_gtest, symm_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_symm_arguments(GetParam()); hipblasStatus_t status = testing_symm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldc < arg.M || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(symm_gtest, symm_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_symm_arguments(GetParam()); hipblasStatus_t status = testing_symm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldc < arg.M || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(symm_gtest, symm_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_symm_arguments(GetParam()); hipblasStatus_t status = testing_symm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldc < arg.M || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(symm_gtest, symm_strided_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_symm_arguments(GetParam()); hipblasStatus_t status = testing_symm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldc < arg.M || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda, ldb}, alpha, {side, diag} } INSTANTIATE_TEST_SUITE_P(hipblassymm_matrix_size, symm_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(side_uplo_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/symv_gtest.cpp000066400000000000000000000214201434647641600214170ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_symv.hpp" #include "testing_symv_batched.hpp" #include "testing_symv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, double, int, bool> symv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1}, {11, 11}, {16, 16}, {32, 32}, {65, 65} // {10, 10, 2}, // {600,500, 500}, // {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000} }; // vector of vector, each element is an {incx, incy} const vector> incx_incy_range = { {-1, -1}, {0, 0}, {1, 2} // {10, 100} }; // {alpha, alphai, beta, betai} const vector> alpha_beta_range = {{-0.5, 1.5, 1.5, -1.0}, {2.0, -1.0, -1.0, 2.0}, {0.0, 0.0, 0.0, 0.0}}; // add/delete single values, like {2.0} const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 symv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_symv_arguments(symv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); vector alpha_beta = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.lda = matrix_size[1]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[1]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_symv_gtest : public ::TestWithParam { protected: blas2_symv_gtest() {} virtual ~blas2_symv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(blas2_symv_gtest, symv_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_symv_arguments(GetParam()); hipblasStatus_t status = testing_symv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.lda < arg.M || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_symv_gtest, symv_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_symv_arguments(GetParam()); hipblasStatus_t status = testing_symv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.lda < arg.M || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_symv_gtest, symv_batched_float) { Arguments arg = setup_symv_arguments(GetParam()); hipblasStatus_t status = testing_symv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_symv_gtest, symv_strided_batched_float) { Arguments arg = setup_symv_arguments(GetParam()); hipblasStatus_t status = testing_symv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} } INSTANTIATE_TEST_SUITE_P(hipblassymv, blas2_symv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_beta_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/syr2_gtest.cpp000066400000000000000000000254301434647641600213250ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_syr2.hpp" #include "testing_syr2_batched.hpp" #include "testing_syr2_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, double, int, bool> syr2_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, lda}; // add/delete as a group const vector> matrix_size_range = {{-1, -1}, {11, 11}, {16, 16}, {32, 32}, {65, 65}}; // vector of vector, each element is an {incx, incy} const vector> incx_incy_range = { {-1, -1}, {0, 0}, {1, 2} // {10, 100} }; // vector, each entry is {alpha}; // add/delete single values, like {2.0} const vector alpha_range = {-0.5, 2.0, 0.0}; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 syr2: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_syr2_arguments(syr2_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx_incy = std::get<1>(tup); double alpha = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.lda = matrix_size[1]; // see the comments about matrix_size_range above arg.incx = incx_incy[0]; arg.incy = incx_incy[0]; arg.alpha = alpha; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_syr2_gtest : public ::TestWithParam { protected: blas2_syr2_gtest() {} virtual ~blas2_syr2_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // syr2 TEST_P(blas2_syr2_gtest, syr2_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2_arguments(GetParam()); hipblasStatus_t status = testing_syr2(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_syr2_gtest, syr2_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2_arguments(GetParam()); hipblasStatus_t status = testing_syr2(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // syr2_batched TEST_P(blas2_syr2_gtest, syr2_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2_arguments(GetParam()); hipblasStatus_t status = testing_syr2_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_syr2_gtest, syr2_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2_arguments(GetParam()); hipblasStatus_t status = testing_syr2_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // syr2_strided_batched TEST_P(blas2_syr2_gtest, syr2_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2_arguments(GetParam()); hipblasStatus_t status = testing_syr2_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_syr2_gtest, syr2_strided_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2_arguments(GetParam()); hipblasStatus_t status = testing_syr2_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.incx == 0 || arg.incy == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasSyr2, blas2_syr2_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/syr2k_gtest.cpp000066400000000000000000000302351434647641600214770ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_syr2k.hpp" #include "testing_syr2k_batched.hpp" #include "testing_syr2k_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, char, char, double, int, bool> syr2k_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, K, lda, ldb, ldc}; // add/delete as a group const vector> matrix_size_range = {{-1, -1, -1, -1, -1}, {11, 6, 11, 11, 11}, {16, 15, 16, 16, 16}, {32, 12, 32, 32, 32}, {65, 4, 65, 65, 65}}; // vector, each entry is {alpha, alphai, beta, betai}; // add/delete single values, like {2.0} const vector> alpha_beta_range = {{-0.5, 1.5, 2.0, 1.5}, {2.0, 1.0, 2.0, 1.0}, {0.0, 0.0, 0.0, 0.0}}; const vector uplo_range = { 'L', 'U', }; const vector transA_range = {'N', 'T'}; // 'C' not supported yet. const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 syr2k: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_syr2k_arguments(syr2k_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); char uplo = std::get<2>(tup); char transA = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.K = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.ldc = matrix_size[4]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.timing = 0; arg.uplo = uplo; arg.transA = transA; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas3_syr2k_gtest : public ::TestWithParam { protected: blas3_syr2k_gtest() {} virtual ~blas3_syr2k_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // syr2k TEST_P(blas3_syr2k_gtest, syr2k_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2k_arguments(GetParam()); hipblasStatus_t status = testing_syr2k(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K))) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_syr2k_gtest, syr2k_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2k_arguments(GetParam()); hipblasStatus_t status = testing_syr2k(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K))) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // syr2k_batched TEST_P(blas3_syr2k_gtest, syr2k_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2k_arguments(GetParam()); hipblasStatus_t status = testing_syr2k_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_syr2k_gtest, syr2k_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2k_arguments(GetParam()); hipblasStatus_t status = testing_syr2k_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // syr2k_strided_batched TEST_P(blas3_syr2k_gtest, syr2k_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2k_arguments(GetParam()); hipblasStatus_t status = testing_syr2k_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_syr2k_gtest, syr2k_strided_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr2k_arguments(GetParam()); hipblasStatus_t status = testing_syr2k_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasSyr2k, blas3_syr2k_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(uplo_range), ValuesIn(transA_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/syr_gtest.cpp000066400000000000000000000260321434647641600212420ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_syr.hpp" #include "testing_syr_batched.hpp" #include "testing_syr_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, double, int, bool> syr_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1}, {11, 11, 11}, {16, 16, 16}, {32, 32, 32}, {65, 65, 65} // {10, 10, 2}, // {600,500, 500}, // {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000} }; // vector of vector, each element is an {incx} const vector> incx_incy_range = { {-2}, {1}, {0}, {2} // {10, 100} }; // vector, each entry is {alpha}; // add/delete single values, like {2.0} const vector alpha_range = {-0.5, 2.0, 0.0}; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 syr: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_syr_arguments(syr_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx = std::get<1>(tup); double alpha = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; // see the comments about matrix_size_range above arg.incx = incx[0]; arg.alpha = alpha; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_syr_gtest : public ::TestWithParam { protected: blas2_syr_gtest() {} virtual ~blas2_syr_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // syr TEST_P(blas2_syr_gtest, syr_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr_arguments(GetParam()); hipblasStatus_t status = testing_syr(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_syr_gtest, syr_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr_arguments(GetParam()); hipblasStatus_t status = testing_syr(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // syr_batched TEST_P(blas2_syr_gtest, syr_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr_arguments(GetParam()); hipblasStatus_t status = testing_syr_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_syr_gtest, syr_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr_arguments(GetParam()); hipblasStatus_t status = testing_syr_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // syr_strided_batched TEST_P(blas2_syr_gtest, syr_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr_arguments(GetParam()); hipblasStatus_t status = testing_syr_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_syr_gtest, syr_strided_batched_gtest_float_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syr_arguments(GetParam()); hipblasStatus_t status = testing_syr_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx <= 0 || arg.incy <= 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasSyr, blas2_syr_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(alpha_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/syrk_gtest.cpp000066400000000000000000000273751434647641600214300ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_syrk.hpp" #include "testing_syrk_batched.hpp" #include "testing_syrk_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, char, char, double, int, bool> syrk_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, K, lda, ldc}; // add/delete as a group const vector> matrix_size_range = {{-1, -1, -1, -1}, {11, 6, 11, 11}, {16, 15, 16, 16}, {32, 12, 32, 32}, {65, 4, 65, 65}}; // vector, each entry is {alpha, alphai, beta, betai}; // add/delete single values, like {2.0} const vector> alpha_beta_range = {{-0.5, 1.5, 2.0, 1.5}, {2.0, 1.0, 2.0, 1.0}, {0.0, 0.0, 0.0, 0.0}}; const vector uplo_range = { 'L', 'U', }; const vector transA_range = {'N', 'T'}; //, 'C'}; // conjugate not supported yet. const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 syrk: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_syrk_arguments(syrk_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); char uplo = std::get<2>(tup); char transA = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.K = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldc = matrix_size[3]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.timing = 0; arg.uplo = uplo; arg.transA = transA; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas3_syrk_gtest : public ::TestWithParam { protected: blas3_syrk_gtest() {} virtual ~blas3_syrk_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // syrk TEST_P(blas3_syrk_gtest, syrk_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrk_arguments(GetParam()); if(arg.transA == 'C') arg.transA = 'T'; hipblasStatus_t status = testing_syrk(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_syrk_gtest, syrk_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrk_arguments(GetParam()); hipblasStatus_t status = testing_syrk(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // syrk_batched TEST_P(blas3_syrk_gtest, syrk_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrk_arguments(GetParam()); if(arg.transA == 'C') arg.transA = 'T'; hipblasStatus_t status = testing_syrk_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_syrk_gtest, syrk_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrk_arguments(GetParam()); hipblasStatus_t status = testing_syrk_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // syrk_strided_batched TEST_P(blas3_syrk_gtest, syrk_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrk_arguments(GetParam()); if(arg.transA == 'C') arg.transA = 'T'; hipblasStatus_t status = testing_syrk_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_syrk_gtest, syrk_strided_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrk_arguments(GetParam()); hipblasStatus_t status = testing_syrk_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && arg.lda < arg.N) || (arg.transA != 'N' && arg.lda < arg.K) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasSyrk, blas3_syrk_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(uplo_range), ValuesIn(transA_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/syrkx_gtest.cpp000066400000000000000000000301171434647641600216040ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "syrkx_reference.hpp" #include "testing_syrkx.hpp" #include "testing_syrkx_batched.hpp" #include "testing_syrkx_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, char, char, double, int, bool> syrkx_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, K, lda, ldb, ldc}; // add/delete as a group const vector> matrix_size_range = {{-1, -1, -1, -1, -1}, {11, 6, 11, 11, 11}, {16, 15, 16, 16, 16}, // {32, 12, 32, 32, 32}, {65, 4, 65, 65, 65}}; // vector, each entry is {alpha, alphai, beta, betai}; // add/delete single values, like {2.0} const vector> alpha_beta_range = {{-0.5, 1.5, 2.0, 1.5}, {2.0, 1.0, 2.0, 1.0}, {0.0, 0.0, 0.0, 0.0}}; const vector uplo_range = { 'L', 'U', }; const vector transA_range = {'N', 'T'}; // 'C' not supported yet. const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 syrkx: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_syrkx_arguments(syrkx_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_beta = std::get<1>(tup); char uplo = std::get<2>(tup); char transA = std::get<3>(tup); double stride_scale = std::get<4>(tup); int batch_count = std::get<5>(tup); bool fortran = std::get<6>(tup); Arguments arg; // see the comments about matrix_size_range above arg.N = matrix_size[0]; arg.K = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.ldc = matrix_size[4]; arg.alpha = alpha_beta[0]; arg.alphai = alpha_beta[1]; arg.beta = alpha_beta[2]; arg.betai = alpha_beta[3]; arg.timing = 0; arg.uplo = uplo; arg.transA = transA; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas3_syrkx_gtest : public ::TestWithParam { protected: blas3_syrkx_gtest() {} virtual ~blas3_syrkx_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; // syrkx TEST_P(blas3_syrkx_gtest, syrkx_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrkx_arguments(GetParam()); hipblasStatus_t status = testing_syrkx(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K))) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_syrkx_gtest, syrkx_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrkx_arguments(GetParam()); hipblasStatus_t status = testing_syrkx(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K))) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ // syrkx_batched TEST_P(blas3_syrkx_gtest, syrkx_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrkx_arguments(GetParam()); hipblasStatus_t status = testing_syrkx_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_syrkx_gtest, syrkx_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrkx_arguments(GetParam()); hipblasStatus_t status = testing_syrkx_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // syrkx_strided_batched TEST_P(blas3_syrkx_gtest, syrkx_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrkx_arguments(GetParam()); hipblasStatus_t status = testing_syrkx_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas3_syrkx_gtest, syrkx_strided_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_syrkx_arguments(GetParam()); hipblasStatus_t status = testing_syrkx_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.K < 0 || arg.ldc < arg.N || (arg.transA == 'N' && (arg.lda < arg.N || arg.ldb < arg.N)) || (arg.transA != 'N' && (arg.lda < arg.K || arg.ldb < arg.K)) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblasSyrkx, blas3_syrkx_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_beta_range), ValuesIn(uplo_range), ValuesIn(transA_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/tbmv_gtest.cpp000066400000000000000000000206271434647641600214010ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_tbmv.hpp" #include "testing_tbmv_batched.hpp" #include "testing_tbmv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, int, bool> tbmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, K, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1}, {11, 5, 11}, {16, 8, 16}, {32, 16, 32}, {65, 64, 65} // {10, 10, 2}, // {600,500, 500}, // {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000} }; // vector of vector, each element is an {incx} const vector> incx_incy_range = { {-1}, {0}, {2} // {10, 100} }; // add/delete single values, like {2.0} const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 tbmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 TBMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_tbmv_arguments(tbmv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx = std::get<1>(tup); double stride_scale = std::get<2>(tup); int batch_count = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.K = matrix_size[1]; arg.lda = matrix_size[2]; // see the comments about matrix_size_range above arg.incx = incx[0]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_tbmv_gtest : public ::TestWithParam { protected: blas2_tbmv_gtest() {} virtual ~blas2_tbmv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(blas2_tbmv_gtest, tbmv_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_tbmv_arguments(GetParam()); hipblasStatus_t status = testing_tbmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.M || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tbmv_gtest, tbmv_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_tbmv_arguments(GetParam()); hipblasStatus_t status = testing_tbmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.M || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_tbmv_gtest, tbmv_batched_float) { Arguments arg = setup_tbmv_arguments(GetParam()); hipblasStatus_t status = testing_tbmv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tbmv_gtest, tbmv_strided_batched_float) { Arguments arg = setup_tbmv_arguments(GetParam()); hipblasStatus_t status = testing_tbmv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} } INSTANTIATE_TEST_SUITE_P(hipblastbmv, blas2_tbmv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/tbsv_gtest.cpp000066400000000000000000000206471434647641600214110ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_tbsv.hpp" #include "testing_tbsv_batched.hpp" #include "testing_tbsv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, int, bool> tbsv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, K, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, -1}, {11, 5, 11}, {16, 8, 16}, {32, 16, 32}, {65, 64, 65} // {10, 10, 2}, // {600,500, 500}, // {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000} }; // vector of vector, each element is an {incx} const vector> incx_incy_range = { {-1}, {0}, {2} // {10, 100} }; // add/delete single values, like {2.0} const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 tbsv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 TBSV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_tbsv_arguments(tbsv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx = std::get<1>(tup); double stride_scale = std::get<2>(tup); int batch_count = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.K = matrix_size[1]; arg.lda = matrix_size[2]; // see the comments about matrix_size_range above arg.incx = incx[0]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_tbsv_gtest : public ::TestWithParam { protected: blas2_tbsv_gtest() {} virtual ~blas2_tbsv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(blas2_tbsv_gtest, tbsv_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_tbsv_arguments(GetParam()); hipblasStatus_t status = testing_tbsv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tbsv_gtest, tbsv_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_tbsv_arguments(GetParam()); hipblasStatus_t status = testing_tbsv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_tbsv_gtest, tbsv_batched_float) { Arguments arg = setup_tbsv_arguments(GetParam()); hipblasStatus_t status = testing_tbsv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tbsv_gtest, tbsv_strided_batched_float) { Arguments arg = setup_tbsv_arguments(GetParam()); hipblasStatus_t status = testing_tbsv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.K < 0 || arg.lda < arg.K + 1 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} } INSTANTIATE_TEST_SUITE_P(hipblastbsv, blas2_tbsv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/tpmv_gtest.cpp000066400000000000000000000202061434647641600214100ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_tpmv.hpp" #include "testing_tpmv_batched.hpp" #include "testing_tpmv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, double, int, bool> tpmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M}; // add/delete as a group const vector matrix_size_range = { -1, 11, 16, 32, 65 // {10, 10, 2}, // {600,500, 500}, // {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000} }; // vector of vector, each element is an {incx} const vector> incx_incy_range = { {-1}, {0}, {2} // {10, 100} }; // add/delete single values, like {2.0} const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 tpmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_tpmv_arguments(tpmv_tuple tup) { int matrix_size = std::get<0>(tup); vector incx = std::get<1>(tup); double stride_scale = std::get<2>(tup); int batch_count = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size; // see the comments about matrix_size_range above arg.incx = incx[0]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_tpmv_gtest : public ::TestWithParam { protected: blas2_tpmv_gtest() {} virtual ~blas2_tpmv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(blas2_tpmv_gtest, tpmv_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_tpmv_arguments(GetParam()); hipblasStatus_t status = testing_tpmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tpmv_gtest, tpmv_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_tpmv_arguments(GetParam()); hipblasStatus_t status = testing_tpmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_tpmv_gtest, tpmv_batched_float) { Arguments arg = setup_tpmv_arguments(GetParam()); hipblasStatus_t status = testing_tpmv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tpmv_gtest, tpmv_strided_batched_float) { Arguments arg = setup_tpmv_arguments(GetParam()); hipblasStatus_t status = testing_tpmv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M}, {incx,incy} } INSTANTIATE_TEST_SUITE_P(hipblastpmv, blas2_tpmv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/tpsv_gtest.cpp000066400000000000000000000220771434647641600214260ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_tpsv.hpp" #include "testing_tpsv_batched.hpp" #include "testing_tpsv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple tpsv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N}; // add/delete as a group const vector matrix_size_range = {-1, 11, 16, 32, 65}; // vector of vector, each element is an {incx} const vector incx_range = { -2, 1, 0, 2 // {10, 100} }; // vector, each entry is {alpha}; // add/delete single values, like {2.0} const vector alpha_range = {0.0}; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 tpsv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_tpsv_arguments(tpsv_tuple tup) { Arguments arg; arg.N = std::get<0>(tup); arg.incx = std::get<1>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_tpsv_gtest : public ::TestWithParam { protected: blas2_tpsv_gtest() {} virtual ~blas2_tpsv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(blas2_tpsv_gtest, tpsv_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_tpsv_arguments(GetParam()); hipblasStatus_t status = testing_tpsv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tpsv_gtest, tpsv_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_tpsv_arguments(GetParam()); hipblasStatus_t status = testing_tpsv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_tpsv_gtest, tpsv_batched_float) { Arguments arg = setup_tpsv_arguments(GetParam()); hipblasStatus_t status = testing_tpsv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tpsv_gtest, tpsv_batched_double_complex) { Arguments arg = setup_tpsv_arguments(GetParam()); hipblasStatus_t status = testing_tpsv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tpsv_gtest, tpsv_strided_batched_float) { Arguments arg = setup_tpsv_arguments(GetParam()); hipblasStatus_t status = testing_tpsv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_tpsv_gtest, tpsv_strided_batched_double_complex) { Arguments arg = setup_tpsv_arguments(GetParam()); hipblasStatus_t status = testing_tpsv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M}, {incx} {alpha} } INSTANTIATE_TEST_SUITE_P(hipblastpsv, blas2_tpsv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_range), ValuesIn(alpha_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/trmm_gtest.cpp000066400000000000000000000325151434647641600214070ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_trmm.hpp" #include "testing_trmm_batched.hpp" #include "testing_trmm_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, double, vector, double, int, bool> trmm_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda, ldb}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, 1, 1}, {10, 10, 20, 100}, // {600, 500, 600, 600} , // {1024, 1024, 1024, 1024} }; const vector> full_matrix_size_range = { {192, 192, 192, 192}, // {640, 640, 960, 960}, // {1000, 1000, 1000, 1000}, // {2000, 2000, 2000, 2000}, }; const vector alpha_range = {1.0, -5.0}; // vector of vector, each pair is a {side, uplo, transA, diag}; // side has two option "Lefe (L), Right (R)" // uplo has two "Lower (L), Upper (U)" // transA has three ("Nontranspose (N), conjTranspose(C), transpose (T)") // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) automatically // in strmm/dtrmm, // so we use 'C' // Diag has two options ("Non-unit (N), Unit (U)") // Each letter is capitalizied, e.g. do not use 'l', but use 'L' instead. const vector> side_uplo_transA_diag_range = { {'L', 'L', 'N', 'N'}, {'R', 'L', 'N', 'N'}, {'L', 'U', 'C', 'N'}, }; // has all the 16 options const vector> full_side_uplo_transA_diag_range = { {'L', 'L', 'N', 'N'}, {'R', 'L', 'N', 'N'}, {'L', 'U', 'N', 'N'}, {'R', 'U', 'N', 'N'}, {'L', 'L', 'C', 'N'}, {'R', 'L', 'C', 'N'}, {'L', 'U', 'C', 'N'}, {'R', 'U', 'C', 'N'}, {'L', 'L', 'N', 'U'}, {'R', 'L', 'N', 'U'}, {'L', 'U', 'N', 'U'}, {'R', 'U', 'N', 'U'}, {'L', 'L', 'C', 'U'}, {'R', 'L', 'C', 'U'}, {'L', 'U', 'C', 'U'}, {'R', 'U', 'C', 'U'}, }; const vector stride_scale_range = {1, 3}; const vector batch_count_range = {1, 3}; const bool is_fortran[] = {false, true}; const bool is_fortran_false[] = {false}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 trmm: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_trmm_arguments(trmm_tuple tup) { vector matrix_size = std::get<0>(tup); double alpha = std::get<1>(tup); vector side_uplo_transA_diag = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.alpha = alpha; arg.side = side_uplo_transA_diag[0]; arg.uplo = side_uplo_transA_diag[1]; arg.transA = side_uplo_transA_diag[2]; arg.diag = side_uplo_transA_diag[3]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class trmm_gtest : public ::TestWithParam { protected: trmm_gtest() {} virtual ~trmm_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(trmm_gtest, trmm_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trmm_arguments(GetParam()); hipblasStatus_t status = testing_trmm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trmm_gtest, trmm_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trmm_arguments(GetParam()); hipblasStatus_t status = testing_trmm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(trmm_gtest, trmm_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trmm_arguments(GetParam()); hipblasStatus_t status = testing_trmm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trmm_gtest, trmm_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trmm_arguments(GetParam()); hipblasStatus_t status = testing_trmm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trmm_gtest, trmm_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trmm_arguments(GetParam()); hipblasStatus_t status = testing_trmm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trmm_gtest, trmm_strided_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trmm_arguments(GetParam()); hipblasStatus_t status = testing_trmm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda, ldb}, alpha, {side, uplo, transA, diag} } // THis function mainly test the scope of matrix_size. the scope of side_uplo_transA_diag_range is // small // Testing order: side_uplo_transA_xx first, alpha_range second, full_matrix_size last // i.e fix the matrix size and alpha, test all the side_uplo_transA_xx first. INSTANTIATE_TEST_SUITE_P(hipblastrmm_matrix_size, trmm_gtest, Combine(ValuesIn(full_matrix_size_range), ValuesIn(alpha_range), ValuesIn(side_uplo_transA_diag_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); // THis function mainly test the scope of full_side_uplo_transA_diag_range,.the scope of // matrix_size_range is small INSTANTIATE_TEST_SUITE_P(hipblastrmm_scalar_transpose, trmm_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_range), ValuesIn(full_side_uplo_transA_diag_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran_false))); hipBLAS-rocm-5.5.1/clients/gtest/trmv_gtest.cpp000066400000000000000000000204601434647641600214140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_trmv.hpp" #include "testing_trmv_batched.hpp" #include "testing_trmv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, double, int, bool> trmv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, lda}; // add/delete as a group const vector> matrix_size_range = { {-1, -1}, {11, 11}, {16, 16}, {32, 32}, {65, 65} // {10, 10, 2}, // {600,500, 500}, // {1000, 1000, 1000}, // {2000, 2000, 2000}, // {4011, 4011, 4011}, // {8000, 8000, 8000} }; // vector of vector, each element is an {incx} const vector> incx_incy_range = { {-1}, {0}, {2} // {10, 100} }; // add/delete single values, like {2.0} const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 trmv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_trmv_arguments(trmv_tuple tup) { vector matrix_size = std::get<0>(tup); vector incx = std::get<1>(tup); double stride_scale = std::get<2>(tup); int batch_count = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.lda = matrix_size[1]; // see the comments about matrix_size_range above arg.incx = incx[0]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_trmv_gtest : public ::TestWithParam { protected: blas2_trmv_gtest() {} virtual ~blas2_trmv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(blas2_trmv_gtest, trmv_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trmv_arguments(GetParam()); hipblasStatus_t status = testing_trmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.lda < arg.M || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_trmv_gtest, trmv_double) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trmv_arguments(GetParam()); hipblasStatus_t status = testing_trmv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.lda < arg.M || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_trmv_gtest, trmv_batched_float) { Arguments arg = setup_trmv_arguments(GetParam()); hipblasStatus_t status = testing_trmv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_trmv_gtest, trmv_strided_batched_float) { Arguments arg = setup_trmv_arguments(GetParam()); hipblasStatus_t status = testing_trmv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} } INSTANTIATE_TEST_SUITE_P(hipblastrmv, blas2_trmv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/trsm_ex_gtest.cpp000066400000000000000000000311561434647641600221110ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_trsm_batched_ex.hpp" #include "testing_trsm_ex.hpp" #include "testing_trsm_strided_batched_ex.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, double, int, bool> trsm_ex_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda, ldb}; // add/delete as a group const vector> full_matrix_size_range = { {192, 192, 192, 192}, {640, 640, 960, 960}, // {1000, 1000, 1000, 1000}, // {2000, 2000, 2000, 2000}, }; const vector> alpha_range = {{1.0, -5.0}}; // vector of vector, each pair is a {side, uplo, transA, diag}; // side has two option "Lefe (L), Right (R)" // uplo has two "Lower (L), Upper (U)" // transA has three ("Nontranspose (N), conjTranspose(C), transpose (T)") // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) automatically // in strsm/dtrsm, // so we use 'C' // Diag has two options ("Non-unit (N), Unit (U)") // Each letter is capitalizied, e.g. do not use 'l', but use 'L' instead. const vector> side_uplo_transA_diag_range = { {'L', 'L', 'N', 'N'}, {'R', 'L', 'N', 'N'}, {'L', 'U', 'C', 'N'}, }; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 1, 2}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 trsm: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_trsm_ex_arguments(trsm_ex_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_alphai = std::get<1>(tup); vector side_uplo_transA_diag = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.alpha = alpha_alphai[0]; arg.alphai = alpha_alphai[1]; arg.side = side_uplo_transA_diag[0]; arg.uplo = side_uplo_transA_diag[1]; arg.transA = side_uplo_transA_diag[2]; arg.diag = side_uplo_transA_diag[3]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class trsm_ex_gtest : public ::TestWithParam { protected: trsm_ex_gtest() {} virtual ~trsm_ex_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(trsm_ex_gtest, trsm_ex_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_ex_arguments(GetParam()); arg.compute_type = HIPBLAS_R_32F; hipblasStatus_t status = testing_trsm_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldb < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trsm_ex_gtest, trsm_gtest_ex_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_ex_arguments(GetParam()); arg.compute_type = HIPBLAS_C_64F; hipblasStatus_t status = testing_trsm_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trsm_ex_gtest, trsm_batched_ex_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_ex_arguments(GetParam()); arg.compute_type = HIPBLAS_R_32F; hipblasStatus_t status = testing_trsm_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.K || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trsm_ex_gtest, trsm_batched_ex_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_ex_arguments(GetParam()); arg.compute_type = HIPBLAS_C_64F; hipblasStatus_t status = testing_trsm_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.K || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trsm_ex_gtest, trsm_strided_batched_ex_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_ex_arguments(GetParam()); arg.compute_type = HIPBLAS_R_32F; hipblasStatus_t status = testing_trsm_strided_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trsm_ex_gtest, trsm_strided_batched_ex_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_ex_arguments(GetParam()); arg.compute_type = HIPBLAS_C_64F; hipblasStatus_t status = testing_trsm_strided_batched_ex(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda, ldb}, alpha, {side, uplo, transA, diag} } // THis function mainly test the scope of matrix_size. the scope of side_uplo_transA_diag_range is // small // Testing order: side_uplo_transA_xx first, alpha_range second, full_matrix_size last // i.e fix the matrix size and alpha, test all the side_uplo_transA_xx first. INSTANTIATE_TEST_SUITE_P(hipblasTrsm_matrix_size, trsm_ex_gtest, Combine(ValuesIn(full_matrix_size_range), ValuesIn(alpha_range), ValuesIn(side_uplo_transA_diag_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/gtest/trsm_gtest.cpp000066400000000000000000000331061434647641600214120ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_trsm.hpp" #include "testing_trsm_batched.hpp" #include "testing_trsm_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, vector, vector, double, int, bool> trsm_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, N, lda, ldb}; // add/delete as a group const vector> matrix_size_range = { {-1, -1, 1, 1}, {10, 10, 20, 100}, {600, 500, 600, 600}, // {1024, 1024, 1024, 1024} }; const vector> full_matrix_size_range = { {192, 192, 192, 192}, {640, 640, 960, 960}, // {1000, 1000, 1000, 1000}, // {2000, 2000, 2000, 2000}, }; const vector> alpha_range = {{1.0, -5.0}}; // vector of vector, each pair is a {side, uplo, transA, diag}; // side has two option "Lefe (L), Right (R)" // uplo has two "Lower (L), Upper (U)" // transA has three ("Nontranspose (N), conjTranspose(C), transpose (T)") // for single/double precision, 'C'(conjTranspose) will downgraded to 'T' (transpose) automatically // in strsm/dtrsm, // so we use 'C' // Diag has two options ("Non-unit (N), Unit (U)") // Each letter is capitalizied, e.g. do not use 'l', but use 'L' instead. const vector> side_uplo_transA_diag_range = { {'L', 'L', 'N', 'N'}, {'R', 'L', 'N', 'N'}, {'L', 'U', 'C', 'N'}, }; // has all the 16 options const vector> full_side_uplo_transA_diag_range = { {'L', 'L', 'N', 'N'}, {'R', 'L', 'N', 'N'}, {'L', 'U', 'N', 'N'}, {'R', 'U', 'N', 'N'}, {'L', 'L', 'C', 'N'}, {'R', 'L', 'C', 'N'}, {'L', 'U', 'C', 'N'}, {'R', 'U', 'C', 'N'}, {'L', 'L', 'N', 'U'}, {'R', 'L', 'N', 'U'}, {'L', 'U', 'N', 'U'}, {'R', 'U', 'N', 'U'}, {'L', 'L', 'C', 'U'}, {'R', 'L', 'C', 'U'}, {'L', 'U', 'C', 'U'}, {'R', 'U', 'C', 'U'}, }; const vector stride_scale_range = {2.5}; const vector batch_count_range = {-1, 0, 1, 2}; const bool is_fortran[] = {false, true}; const bool is_fortran_false[] = {false}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 trsm: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_trsm_arguments(trsm_tuple tup) { vector matrix_size = std::get<0>(tup); vector alpha_alphai = std::get<1>(tup); vector side_uplo_transA_diag = std::get<2>(tup); double stride_scale = std::get<3>(tup); int batch_count = std::get<4>(tup); bool fortran = std::get<5>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.N = matrix_size[1]; arg.lda = matrix_size[2]; arg.ldb = matrix_size[3]; arg.alpha = alpha_alphai[0]; arg.alphai = alpha_alphai[1]; arg.side = side_uplo_transA_diag[0]; arg.uplo = side_uplo_transA_diag[1]; arg.transA = side_uplo_transA_diag[2]; arg.diag = side_uplo_transA_diag[3]; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class trsm_gtest : public ::TestWithParam { protected: trsm_gtest() {} virtual ~trsm_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(trsm_gtest, trsm_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_arguments(GetParam()); hipblasStatus_t status = testing_trsm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else if(arg.ldb < arg.M) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trsm_gtest, trsm_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_arguments(GetParam()); hipblasStatus_t status = testing_trsm(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N)) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trsm_gtest, trsm_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_arguments(GetParam()); hipblasStatus_t status = testing_trsm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.K || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trsm_gtest, trsm_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_arguments(GetParam()); hipblasStatus_t status = testing_trsm_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.K || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(trsm_gtest, trsm_strided_batched_gtest_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_arguments(GetParam()); hipblasStatus_t status = testing_trsm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trsm_gtest, trsm_strided_batched_gtest_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsm_arguments(GetParam()); hipblasStatus_t status = testing_trsm_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.ldb < arg.M || (arg.side == 'L' ? arg.lda < arg.M : arg.lda < arg.N) || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda, ldb}, alpha, {side, uplo, transA, diag} } // THis function mainly test the scope of matrix_size. the scope of side_uplo_transA_diag_range is // small // Testing order: side_uplo_transA_xx first, alpha_range second, full_matrix_size last // i.e fix the matrix size and alpha, test all the side_uplo_transA_xx first. INSTANTIATE_TEST_SUITE_P(hipblasTrsm_matrix_size, trsm_gtest, Combine(ValuesIn(full_matrix_size_range), ValuesIn(alpha_range), ValuesIn(side_uplo_transA_diag_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); // THis function mainly test the scope of full_side_uplo_transA_diag_range,.the scope of // matrix_size_range is small INSTANTIATE_TEST_SUITE_P(hipblasTrsm_scalar_transpose, trsm_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(alpha_range), ValuesIn(full_side_uplo_transA_diag_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran_false))); hipBLAS-rocm-5.5.1/clients/gtest/trsv_gtest.cpp000066400000000000000000000225111434647641600214210ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_trsv.hpp" #include "testing_trsv_batched.hpp" #include "testing_trsv_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; // only GCC/VS 2010 comes with std::tr1::tuple, but it is unnecessary, std::tuple is good enough; typedef std::tuple, int, double, int, bool> trsv_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {M, lda}; // add/delete as a group const vector> matrix_size_range = {{-1, -1}, {11, 11}, {16, 16}, {32, 32}, {65, 65}}; // vector of vector, each element is an {incx} const vector incx_incy_range = {-2, 1, 0, 2}; const vector stride_scale_range = {1.0, 2.5}; const vector batch_count_range = {-1, 0, 1, 2, 10}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-2 trsv: =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // by std:tuple, you have unpack it with extreme care for each one by like "std::get<0>" which is // not intuitive and error-prone Arguments setup_trsv_arguments(trsv_tuple tup) { vector matrix_size = std::get<0>(tup); int incx = std::get<1>(tup); double stride_scale = std::get<2>(tup); int batch_count = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; // see the comments about matrix_size_range above arg.M = matrix_size[0]; arg.lda = matrix_size[1]; // see the comments about matrix_size_range above arg.incx = incx; arg.timing = 0; arg.stride_scale = stride_scale; arg.batch_count = batch_count; arg.fortran = fortran; return arg; } class blas2_trsv_gtest : public ::TestWithParam { protected: blas2_trsv_gtest() {} virtual ~blas2_trsv_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; TEST_P(blas2_trsv_gtest, trsv_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsv_arguments(GetParam()); hipblasStatus_t status = testing_trsv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_trsv_gtest, trsv_double_complex) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trsv_arguments(GetParam()); hipblasStatus_t status = testing_trsv(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx == 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #ifndef __HIP_PLATFORM_NVCC__ TEST_P(blas2_trsv_gtest, trsv_batched_float) { Arguments arg = setup_trsv_arguments(GetParam()); hipblasStatus_t status = testing_trsv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_trsv_gtest, trsv_batched_double_complex) { Arguments arg = setup_trsv_arguments(GetParam()); hipblasStatus_t status = testing_trsv_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_trsv_gtest, trsv_strided_batched_float) { Arguments arg = setup_trsv_arguments(GetParam()); hipblasStatus_t status = testing_trsv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(blas2_trsv_gtest, trsv_strided_batched_double_complex) { Arguments arg = setup_trsv_arguments(GetParam()); hipblasStatus_t status = testing_trsv_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.M < 0 || arg.N < 0 || arg.lda < arg.M || arg.incx == 0 || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } #endif // notice we are using vector of vector // so each elment in xxx_range is a avector, // ValuesIn take each element (a vector) and combine them and feed them to test_p // The combinations are { {M, N, lda}, {incx,incy} } INSTANTIATE_TEST_SUITE_P(hipblastrsv, blas2_trsv_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(incx_incy_range), ValuesIn(stride_scale_range), ValuesIn(batch_count_range), ValuesIn(is_fortran))); hipBLAS-rocm-5.5.1/clients/gtest/trtri_gtest.cpp000066400000000000000000000170761434647641600216010ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_trtri.hpp" #include "testing_trtri_batched.hpp" #include "testing_trtri_strided_batched.hpp" #include "utility.h" #include #include #include using std::vector; using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; typedef std::tuple, char, char, int, bool> trtri_tuple; /* ===================================================================== README: This file contains testers to verify the correctness of BLAS routines with google test It is supposed to be played/used by advance / expert users Normal users only need to get the library routines without testers =================================================================== */ /* ===================================================================== Advance users only: BrainStorm the parameters but do not make artificial one which invalidates the matrix. like lda pairs with M, and "lda must >= M". case "lda < M" will be guarded by argument-checkers inside API of course. Yet, the goal of this file is to verify result correctness not argument-checkers. Representative sampling is sufficient, endless brute-force sampling is not necessary =================================================================== */ // vector of vector, each vector is a {N, lda}; N > 32 will return not implemented // add/delete as a group const vector> matrix_size_range = {{-1, -1}, {10, 10}, {20, 160}, {21, 14}, {32, 32}, {111, 122}}; const vector uplo_range = {'U', 'L'}; const vector diag_range = {'N', 'U'}; // it applies on trtri_batched only const vector batch_range = {-1, 1, 100, 1000}; const bool is_fortran[] = {false, true}; /* ===============Google Unit Test==================================================== */ /* ===================================================================== BLAS-3 TRTRI and TRTRI_Batched =================================================================== */ /* ============================Setup Arguments======================================= */ // Please use "class Arguments" (see utility.hpp) to pass parameters to templated testers; // Some routines may not touch/use certain "members" of objects "arg". // like BLAS-1 Scal does not have lda, BLAS-2 GEMV does not have ldb, ldc; // That is fine. These testers & routines will leave untouched members alone. // Do not use std::tuple to directly pass parameters to testers // If soe, you unpack it with extreme care for each one by like "std::get<0>" which is not intuitive // and error-prone Arguments setup_trtri_arguments(trtri_tuple tup) { vector matrix_size = std::get<0>(tup); char uplo = std::get<1>(tup); char diag = std::get<2>(tup); int batch_count = std::get<3>(tup); bool fortran = std::get<4>(tup); Arguments arg; arg.N = matrix_size[0]; arg.lda = matrix_size[1]; arg.uplo = uplo; arg.diag = diag; arg.batch_count = batch_count; arg.fortran = fortran; arg.timing = 0; return arg; } class trtri_gtest : public ::TestWithParam { protected: trtri_gtest() {} virtual ~trtri_gtest() {} virtual void SetUp() {} virtual void TearDown() {} }; #ifndef __HIP_PLATFORM_NVCC__ TEST_P(trtri_gtest, trtri_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trtri_arguments(GetParam()); hipblasStatus_t status = testing_trtri(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trtri_gtest, trtri_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trtri_arguments(GetParam()); hipblasStatus_t status = testing_trtri_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } TEST_P(trtri_gtest, trtri_strided_batched_float) { // GetParam return a tuple. Tee setup routine unpack the tuple // and initializes arg(Arguments) which will be passed to testing routine // The Arguments data struture have physical meaning associated. // while the tuple is non-intuitive. Arguments arg = setup_trtri_arguments(GetParam()); hipblasStatus_t status = testing_trtri_strided_batched(arg); // if not success, then the input argument is problematic, so detect the error message if(status != HIPBLAS_STATUS_SUCCESS) { if(arg.N < 0 || arg.lda < arg.N || arg.batch_count < 0) { EXPECT_EQ(HIPBLAS_STATUS_INVALID_VALUE, status); } else { EXPECT_EQ(HIPBLAS_STATUS_SUCCESS, status); // fail } } } // notice we are using vector of vector for matrix size, and vector for uplo, diag // ValuesIn take each element (a vector or a char) and combine them and feed them to test_p // The combinations are { {N, lda}, uplo, diag } // THis function mainly test the scope of matrix_size. INSTANTIATE_TEST_SUITE_P(hipblasTrtri, trtri_gtest, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range), ValuesIn(diag_range), ValuesIn(batch_range), ValuesIn(is_fortran))); #endif hipBLAS-rocm-5.5.1/clients/include/000077500000000000000000000000001434647641600170055ustar00rootroot00000000000000hipBLAS-rocm-5.5.1/clients/include/arg_check.h000066400000000000000000000030001434647641600210550ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef _ARG_CHECK_H #define _ARG_CHECK_H #include "hipblas.h" #ifdef GOOGLE_TEST #include "gtest/gtest.h" #endif void verify_hipblas_status_invalid_value(hipblasStatus_t status, const char* message); #endif hipBLAS-rocm-5.5.1/clients/include/argument_model.hpp000066400000000000000000000175351434647641600225330ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef _ARGUMENT_MODEL_HPP_ #define _ARGUMENT_MODEL_HPP_ #include "hipblas_arguments.hpp" #include #include #include namespace ArgumentLogging { const double NA_value = -1.0; // invalid for time, GFlop, GB } // these aren't static as ArgumentModel is instantiated for many Arg lists void ArgumentModel_set_log_function_name(bool f); bool ArgumentModel_get_log_function_name(); void ArgumentModel_set_log_datatype(bool d); bool ArgumentModel_get_log_datatype(); // ArgumentModel template has a variadic list of argument enums template class ArgumentModel { // Whether model has a particular parameter // TODO: Replace with C++17 fold expression ((Args == param) || ...) static bool has(hipblas_argument param) { return false; } template static bool has(hipblas_argument param, T const& first, Ts const&... rest) { if(param == first) return true; return has(param, rest...); } public: void log_perf(std::stringstream& name_line, std::stringstream& val_line, const Arguments& arg, double gpu_us, double gflops, double gbytes, double norm1, double norm2) { bool has_batch_count = has(e_batch_count, Args...); int batch_count = has_batch_count ? arg.batch_count : 1; int hot_calls = arg.iters < 1 ? 1 : arg.iters; // per/us to per/sec *10^6 double hipblas_gflops = gflops * batch_count * hot_calls / gpu_us * 1e6; double hipblas_GBps = gbytes * batch_count * hot_calls / gpu_us * 1e6; // append performance fields if(name_line.rdbuf()->in_avail()) name_line << ","; name_line << "hipblas-Gflops,hipblas-GB/s,hipblas-us,"; if(val_line.rdbuf()->in_avail()) val_line << ","; val_line << hipblas_gflops << ", " << hipblas_GBps << ", " << gpu_us / hot_calls << ", "; if(arg.unit_check || arg.norm_check) { if(arg.norm_check) { name_line << "norm_error_host_ptr,norm_error_device_ptr,"; val_line << norm1 << ", " << norm2 << ", "; } } } template void log_args(std::ostream& str, const Arguments& arg, double gpu_us, double gflops, double gpu_bytes = 0, double norm1 = 0, double norm2 = 0) { if(arg.iters < 1) return; // warmup test only std::stringstream name_list; std::stringstream value_list; if(ArgumentModel_get_log_function_name()) { auto delim = ","; name_list << "function" << delim; value_list << arg.function << delim; } if(ArgumentModel_get_log_datatype()) { auto delim = ","; name_list << "a_type" << delim; value_list << hipblas_datatype2string(arg.a_type) << delim; name_list << "b_type" << delim; value_list << hipblas_datatype2string(arg.b_type) << delim; name_list << "c_type" << delim; value_list << hipblas_datatype2string(arg.c_type) << delim; name_list << "d_type" << delim; value_list << hipblas_datatype2string(arg.d_type) << delim; name_list << "compute_type" << delim; value_list << hipblas_datatype2string(arg.compute_type) << delim; } // Output (name, value) pairs to name_list and value_list auto print = [&, delim = ""](const char* name, auto&& value) mutable { name_list << delim << name; value_list << delim << value; delim = ","; }; // Args is a parameter pack of type: hipblas_argument... // The hipblas_argument enum values in Args correspond to the function arguments that // will be printed by hipblas_test or hipblas_bench. For example, the function: // // hipblas_ddot(hipblas_handle handle, // hipblas_int n, // const double* x, // hipblas_int incx, // const double* y, // hipblas_int incy, // double* result); // will have = // // print is a lambda defined above this comment block // // arg is an instance of the Arguments struct // // apply is a templated lambda for C++17 and a templated fuctor for C++14 // // For hipblas_ddot, the following template specialization of apply will be called: // apply(print, arg, T{}), apply(print, arg, T{}),, apply(print, arg, T{}) // // apply in turn calls print with a string corresponding to the enum, for example "N" and the value of N // #if __cplusplus >= 201703L // C++17 (ArgumentsHelper::apply(print, arg, T{}), ...); #else // C++14. TODO: Remove when C++17 is used (void)(int[]){(ArgumentsHelper::apply{}()(print, arg, T{}), 0)...}; #endif if(arg.timing) log_perf(name_list, value_list, arg, gpu_us, gflops, gpu_bytes, norm1, norm2); str << name_list.str() << "\n" << value_list.str() << std::endl; } void test_name(const Arguments& arg, std::string& name) { std::stringstream name_list; auto sep = "_"; name_list << sep << arg.function; name_list << sep << hipblas_datatype2string(arg.a_type); // Output (name, value) pairs to name_list and value_list auto print = [&](const char* name, auto&& value) mutable { name_list << sep << name << sep << value; }; #if __cplusplus >= 201703L // C++17 (ArgumentsHelper::apply(print, arg, float{}), ...); #else // C++14. TODO: Remove when C++17 is used (void)(int[]){(ArgumentsHelper::apply{}()(print, arg, float{}), 0)...}; #endif std::string params = name_list.str(); std::replace(params.begin(), params.end(), '-', 'n'); std::replace(params.begin(), params.end(), '.', 'p'); name += params; } }; #endif hipBLAS-rocm-5.5.1/clients/include/bytes.hpp000066400000000000000000000260331434647641600206500ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef _HIPBLAS_BYTES_H_ #define _HIPBLAS_BYTES_H_ #include "hipblas.h" /*!\file * \brief provides bandwidth measure as byte counts Basic Linear Algebra Subprograms (BLAS) of * Level 1, 2, 3. Where possible we are using the values of NOP from the legacy BLAS files * [sdcz]blas[23]time.f for byte counts. */ /* * =========================================================================== * Auxiliary * =========================================================================== */ /* \brief byte counts of SET/GET_MATRIX/_ASYNC calls done in pairs for timing */ template constexpr double set_get_matrix_gbyte_count(int m, int n) { return (sizeof(T) * m * n * 2.0) / 1e9; } /* \brief byte counts of SET/GET_VECTOR/_ASYNC */ template constexpr double set_get_vector_gbyte_count(int n) { // calls done in pairs for timing so x 2.0 return (sizeof(T) * n * 2.0) / 1e9; } /* * =========================================================================== * level 1 BLAS * =========================================================================== */ /* \brief byte counts of ASUM */ template constexpr double asum_gbyte_count(int n) { return (sizeof(T) * n) / 1e9; } /* \brief byte counts of AXPY */ template constexpr double axpy_gbyte_count(int n) { return (sizeof(T) * 3.0 * n) / 1e9; } /* \brief byte counts of COPY */ template constexpr double copy_gbyte_count(int n) { return (sizeof(T) * 2.0 * n) / 1e9; } /* \brief byte counts of DOT */ template constexpr double dot_gbyte_count(int n) { return (sizeof(T) * 2.0 * n) / 1e9; } /* \brief byte counts of iamax/iamin */ template constexpr double iamax_gbyte_count(int n) { return (sizeof(T) * 2.0 * n) / 1e9; } /* \brief byte counts of NRM2 */ template constexpr double nrm2_gbyte_count(int n) { return (sizeof(T) * n) / 1e9; } /* \brief byte counts of ROT */ template constexpr double rot_gbyte_count(int n) { return (sizeof(T) * 4.0 * n) / 1e9; // 2 loads and 2 stores } /* \brief byte counts of ROTM */ template constexpr double rotm_gbyte_count(int n, T flag) { //No load and store operations when flag is set to -2.0 if(flag != -2.0) { return (sizeof(T) * 4.0 * n) / 1e9; //2 loads and 2 stores } else { return 0; } } /* \brief byte counts of SCAL */ template constexpr double scal_gbyte_count(int n) { return (sizeof(T) * 2.0 * n) / 1e9; } /* \brief byte counts of SWAP */ template constexpr double swap_gbyte_count(int n) { return (sizeof(T) * 4.0 * n) / 1e9; } /* * =========================================================================== * level 2 BLAS * =========================================================================== */ inline size_t tri_count(int n) { return size_t(n) * (1 + n) / 2; } /* \brief byte counts of GEMV */ template constexpr double gemv_gbyte_count(hipblasOperation_t transA, int m, int n) { return (sizeof(T) * (m * n + 2 * (transA == HIPBLAS_OP_N ? n : m))) / 1e9; } /* \brief byte counts of GBMV */ template constexpr double gbmv_gbyte_count(hipblasOperation_t transA, int m, int n, int kl, int ku) { size_t dim_x = transA == HIPBLAS_OP_N ? n : m; int k1 = dim_x < kl ? dim_x : kl; int k2 = dim_x < ku ? dim_x : ku; int d1 = ((k1 * dim_x) - (k1 * (k1 + 1) / 2)); int d2 = ((k2 * dim_x) - (k2 * (k2 + 1) / 2)); double num_els = double(d1 + d2 + dim_x); return (sizeof(T) * (num_els)) / 1e9; } /* \brief byte counts of GER */ template constexpr double ger_gbyte_count(int m, int n) { return (sizeof(T) * (m * n + m + n)) / 1e9; } /* \brief byte counts of HBMV */ template constexpr double hbmv_gbyte_count(int n, int k) { int k1 = k < n ? k : n; return (sizeof(T) * (n * k1 - ((k1 * (k1 + 1)) / 2.0) + 3 * n)) / 1e9; } /* \brief byte counts of HEMV */ template constexpr double hemv_gbyte_count(int n) { return (sizeof(T) * (((n * (n + 1.0)) / 2.0) + 3.0 * n)) / 1e9; } /* \brief byte counts of HPMV */ template constexpr double hpmv_gbyte_count(int n) { return (sizeof(T) * ((n * (n + 1.0)) / 2.0) + 3.0 * n) / 1e9; } /* \brief byte counts of HPR */ template constexpr double hpr_gbyte_count(int n) { return (sizeof(T) * (tri_count(n) + n)) / 1e9; } /* \brief byte counts of HPR2 */ template constexpr double hpr2_gbyte_count(int n) { return (sizeof(T) * (tri_count(n) + 2.0 * n)) / 1e9; } /* \brief byte counts of SYMV */ template constexpr double symv_gbyte_count(int n) { return (sizeof(T) * (tri_count(n) + n)) / 1e9; } /* \brief byte counts of SPMV */ template constexpr double spmv_gbyte_count(int n) { return (sizeof(T) * (tri_count(n) + n)) / 1e9; } /* \brief byte c ounts of SPR */ template constexpr double spr_gbyte_count(int n) { // read and write of A + read of x return (sizeof(T) * (tri_count(n) * 2 + n)) / 1e9; } /* \brief byte counts of SPR2 */ template constexpr double spr2_gbyte_count(int n) { // read and write of A + read of x and y return (sizeof(T) * (tri_count(n) * 2 + n * 2)) / 1e9; } /* \brief byte counts of SBMV */ template constexpr double sbmv_gbyte_count(int n, int k) { int k1 = k < n ? k : n - 1; return (sizeof(T) * (tri_count(n) - tri_count(n - (k1 + 1)) + n)) / 1e9; } /* \brief byte counts of HER */ template constexpr double her_gbyte_count(int n) { return (sizeof(T) * (tri_count(n) + n)) / 1e9; } /* \brief byte counts of HER2 */ template constexpr double her2_gbyte_count(int n) { return (sizeof(T) * (tri_count(n) + 2 * n)) / 1e9; } /* \brief byte counts of SYR */ template constexpr double syr_gbyte_count(int n) { // read and write of A + read of x return (sizeof(T) * (tri_count(n) * 2 + n)) / 1e9; } /* \brief byte counts of SYR2 */ template constexpr double syr2_gbyte_count(int n) { // read and write of A + read of x and y return (sizeof(T) * (tri_count(n) * 2 + n * 2)) / 1e9; } /* \brief byte counts of TBMV */ template constexpr double tbmv_gbyte_count(int m, int k) { int k1 = k < m ? k : m; return (sizeof(T) * (m * k1 - ((k1 * (k1 + 1)) / 2.0) + 3 * m)) / 1e9; } /* \brief byte counts of TPMV */ template constexpr double tpmv_gbyte_count(int m) { return (sizeof(T) * tri_count(m)) / 1e9; } /* \brief byte counts of TRMV */ template constexpr double trmv_gbyte_count(int m) { return (sizeof(T) * ((m * (m + 1.0)) / 2 + 2 * m)) / 1e9; } /* \brief byte coutns of TBSV */ template constexpr double tbsv_gbyte_count(int n, int k) { int k1 = k < n ? k : n; return (sizeof(T) * (n * k1 - ((k1 * (k1 + 1)) / 2.0) + 2 * n)) / 1e9; } /* \brief byte counts of TPSV */ template constexpr double tpsv_gbyte_count(int n) { return (sizeof(T) * (tri_count(n) + n)) / 1e9; } /* \brief byte c ounts or TRSV */ template constexpr double trsv_gbyte_count(int n) { return (sizeof(T) * (tri_count(n) + n)) / 1e9; } /* * =========================================================================== * level 3 BLAS * =========================================================================== */ /* \brief byte counts of GEMM */ template constexpr double gemm_gbyte_count(int m, int n, int k) { return (sizeof(T) * (m * k + n * k + m * n)) / 1e9; } /* \brief byte counts of TRMM */ template constexpr double trmm_gbyte_count(int m, int n, int k) { return (sizeof(T) * (m * n * 2 + k * k / 2)) / 1e9; } /* \brief byte counts of TRSM */ template constexpr double trsm_gbyte_count(int m, int n, int k) { return (sizeof(T) * (tri_count(k) + n * m)) / 1e9; } /* \brief byte counts of SYRK */ template constexpr double syrk_gbyte_count(int n, int k) { return (sizeof(T) * (tri_count(n) + n * k)) / 1e9; } /* \brief byte counts of SYR2K */ template constexpr double syr2k_gbyte_count(int n, int k) { // Read A, B, C, write C return (sizeof(T) * (2 * n * k + 2 * tri_count(n))); } /* \brief byte counts of HERK */ template constexpr double herk_gbyte_count(int n, int k) { return syrk_gbyte_count(n, k); } /* \brief byte counts of SYRKX */ template constexpr double syrkx_gbyte_count(int n, int k) { return (sizeof(T) * (tri_count(n) + 2 * (n * k))) / 1e9; } /* \brief byte counts of HER2K */ template constexpr double her2k_gbyte_count(int n, int k) { return syr2k_gbyte_count(n, k); } /* \brief byte counts of HERKX */ template constexpr double herkx_gbyte_count(int n, int k) { return syrkx_gbyte_count(n, k); } /* \brief byte counts of DGMM */ template constexpr double dgmm_gbyte_count(int n, int m, int k) { // read A, read x, write C return (sizeof(T) * (2 * m * n) + (k)); } /* \brief byte counts of GEAM */ template constexpr double geam_gbyte_count(int n, int m) { // read A, read B, write to C return (sizeof(T) * 3 * m * n); } /* \brief byte counts of HEMM */ template constexpr double hemm_gbyte_count(int n, int m, int k) { // read A, B, C, write C return (sizeof(T) * (3 * m * n + tri_count(k))); } /* \brief byte counts of SYMM */ template constexpr double symm_gbyte_count(int n, int m, int k) { // read A, B, C, write C return (sizeof(T) * (3 * m * n + tri_count(k))); } /* \brief byte counts of TRTRI */ template constexpr double trtri_gbyte_count(int n) { // read A, write invA return (sizeof(T) * (2 * tri_count(n))); } #endif /* _HIPBLAS_BYTES_H_ */ hipBLAS-rocm-5.5.1/clients/include/cblas_interface.h000066400000000000000000000371621434647641600222730ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef _CBLAS_INTERFACE_ #define _CBLAS_INTERFACE_ #include "hipblas.h" /*!\file * \brief provide template functions interfaces to CBLAS C89 interfaces, it is only used for testing * not part of the GPU library */ /* * =========================================================================== * level 1 BLAS * =========================================================================== */ template void cblas_axpy(int n, const Ta alpha, const Tx* x, int incx, Tx* y, int incy); template void cblas_scal(int n, const U alpha, T* x, int incx); template void cblas_copy(int n, T* x, int incx, T* y, int incy); template void cblas_swap(int n, T* x, int incx, T* y, int incy); template void cblas_dot(int n, const T* x, int incx, const T* y, int incy, T* result); template void cblas_dotc(int n, const T* x, int incx, const T* y, int incy, T* result); template void cblas_nrm2(int n, const T1* x, int incx, T2* result); template void cblas_rot(int n, T1* x, int incx, T1* y, int incy, T2 c, T3 s); template void cblas_rotg(T1* a, T1* b, T2* c, T1* s); template void cblas_rotm(int n, T1* x, int incx, T1* y, int incy, T1* param); template void cblas_rotmg(T1* d1, T1* d2, T1* x1, T1* y1, T1* param); template void cblas_asum(int n, const T1* x, int incx, T2* result); template void cblas_iamax(int n, const T* x, int incx, int* result); template void cblas_iamin(int n, const T* x, int incx, int* result); template void cblas_gbmv(hipblasOperation_t transA, int m, int n, int kl, int ku, T alpha, T* A, int lda, T* x, int incx, T beta, T* y, int incy); template void cblas_gemv(hipblasOperation_t transA, int m, int n, T alpha, T* A, int lda, T* x, int incx, T beta, T* y, int incy); template void cblas_symv( hipblasFillMode_t uplo, int n, T alpha, T* A, int lda, T* x, int incx, T beta, T* y, int incy); // ger (ger, geru, gerc) template void cblas_ger(int m, int n, T alpha, T* x, int incx, T* y, int incy, T* A, int lda); // hbmv template void cblas_hbmv(hipblasFillMode_t uplo, int n, int k, T alpha, T* A, int lda, T* x, int incx, T beta, T* y, int incy); // hemv template void cblas_hemv( hipblasFillMode_t uplo, int n, U alpha, T* A, int lda, T* x, int incx, T beta, T* y, int incy); // spr template void cblas_spr(hipblasFillMode_t uplo, int n, T alpha, T* x, int incx, T* AP); // spr2 template void cblas_spr2(hipblasFillMode_t uplo, int n, T alpha, T* x, int incx, T* y, int incy, T* AP); // syr template void cblas_syr(hipblasFillMode_t uplo, int n, T alpha, T* x, int incx, T* A, int lda); // syr2 template void cblas_syr2( hipblasFillMode_t uplo, int n, T alpha, T* x, int incx, T* y, int incy, T* A, int lda); // her template void cblas_her(hipblasFillMode_t uplo, int n, U alpha, T* x, int incx, T* A, int lda); // her2 template void cblas_her2( hipblasFillMode_t uplo, int n, T alpha, T* x, int incx, T* y, int incy, T* A, int lda); // hpmv template void cblas_hpmv( hipblasFillMode_t uplo, int n, T alpha, T* AP, T* x, int incx, T beta, T* y, int incy); // hpr template void cblas_hpr(hipblasFillMode_t uplo, int n, U alpha, T* x, int incx, T* AP); // hpr2 template void cblas_hpr2(hipblasFillMode_t uplo, int n, T alpha, T* x, int incx, T* y, int incy, T* AP); // sbmv template void cblas_sbmv(hipblasFillMode_t uplo, int n, int k, T alpha, T* A, int lda, T* x, int incx, T beta, T* y, int incy); // spmv template void cblas_spmv( hipblasFillMode_t uplo, int n, T alpha, T* AP, T* x, int incx, T beta, T* y, int incy); // symv template void cblas_symv( hipblasFillMode_t uplo, int n, T alpha, T* A, int lda, T* x, int incx, T beta, T* y, int incy); // potrf template int cblas_potrf(char uplo, int m, T* A, int lda); // tbmv template void cblas_tbmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const T* A, int lda, T* x, int incx); // tbsv template void cblas_tbsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const T* A, int lda, T* x, int incx); // tpmv template void cblas_tpmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* A, T* x, int incx); // tpsv template void cblas_tpsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, const T* AP, T* x, int incx); // trmv template void cblas_trmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* A, int lda, T* x, int incx); // trsv template void cblas_trsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* A, int lda, T* x, int incx); // hemv template void cblas_hemv( hipblasFillMode_t uplo, int n, T alpha, T* A, int lda, T* x, int incx, T beta, T* y, int incy); // herk template void cblas_herk(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, U alpha, T* A, int lda, U beta, T* C, int ldc); // herkx template void cblas_herkx(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, T alpha, T* A, int lda, T* B, int ldb, U beta, T* C, int ldc); // her2k template void cblas_her2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, T alpha, T* A, int lda, T* B, int ldb, U beta, T* C, int ldc); // geam template void cblas_geam(hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, T* alpha, T* A, int lda, T* beta, T* B, int ldb, T* C, int ldc); // gemm template void cblas_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, Tc alpha, Ti* A, int lda, Ti* B, int ldb, Tc beta, To* C, int ldc); // hemm template void cblas_hemm(hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, T alpha, T* A, int lda, T* B, int ldb, T beta, T* C, int ldc); // symm template void cblas_symm(hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, T alpha, T* A, int lda, T* B, int ldb, T beta, T* C, int ldc); // syrk template void cblas_syrk(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, T alpha, T* A, int lda, T beta, T* C, int ldc); // syr2k template void cblas_syr2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, T alpha, T* A, int lda, T* B, int ldb, T beta, T* C, int ldc); // syrkx template void cblas_syrkx(hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, T alpha, T* A, int lda, T* B, int ldb, T beta, T* C, int ldc); // trsm template void cblas_trsm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, T alpha, const T* A, int lda, T* B, int ldb); // trtri template int cblas_trtri(char uplo, char diag, int n, T* A, int lda); // trmm template void cblas_trmm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, T alpha, const T* A, int lda, T* B, int ldb); template int cblas_getrf(int m, int n, T* A, int lda, int* ipiv); template int cblas_getrs(char trans, int n, int nrhs, T* A, int lda, int* ipiv, T* B, int ldb); template int cblas_getri(int n, T* A, int lda, int* ipiv, T* work, int lwork); template int cblas_geqrf(int m, int n, T* A, int lda, T* tau, T* work, int lwork); template int cblas_gels( char trans, int m, int n, int nrhs, T* A, int lda, T* B, int ldb, T* work, int lwork); /* ============================================================================================ */ #endif /* _CBLAS_INTERFACE_ */ hipBLAS-rocm-5.5.1/clients/include/clients_common.hpp000066400000000000000000000027141434647641600225330ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #pragma once #include struct Arguments; void get_test_name(const Arguments& arg, std::string& name); int run_bench_test(Arguments& arg, int unit_check, int timing); hipBLAS-rocm-5.5.1/clients/include/complex.hpp000066400000000000000000000140031434647641600211630ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef HIPBLAS_COMPLEX_HPP #define HIPBLAS_COMPLEX_HPP #include "hipblas.h" #include inline hipblasComplex& operator+=(hipblasComplex& lhs, const hipblasComplex& rhs) { reinterpret_cast&>(lhs) += reinterpret_cast&>(rhs); return lhs; } inline hipblasDoubleComplex& operator+=(hipblasDoubleComplex& lhs, const hipblasDoubleComplex& rhs) { reinterpret_cast&>(lhs) += reinterpret_cast&>(rhs); return lhs; } inline hipblasComplex operator+(hipblasComplex lhs, const hipblasComplex& rhs) { return lhs += rhs; } inline hipblasDoubleComplex operator+(hipblasDoubleComplex lhs, const hipblasDoubleComplex& rhs) { return lhs += rhs; } inline hipblasComplex& operator-=(hipblasComplex& lhs, const hipblasComplex& rhs) { reinterpret_cast&>(lhs) -= reinterpret_cast&>(rhs); return lhs; } inline hipblasDoubleComplex& operator-=(hipblasDoubleComplex& lhs, const hipblasDoubleComplex& rhs) { reinterpret_cast&>(lhs) -= reinterpret_cast&>(rhs); return lhs; } inline hipblasComplex operator-(hipblasComplex lhs, const hipblasComplex& rhs) { return lhs -= rhs; } inline hipblasDoubleComplex operator-(hipblasDoubleComplex lhs, const hipblasDoubleComplex& rhs) { return lhs -= rhs; } inline hipblasComplex& operator*=(hipblasComplex& lhs, const hipblasComplex& rhs) { reinterpret_cast&>(lhs) *= reinterpret_cast&>(rhs); return lhs; } inline hipblasDoubleComplex& operator*=(hipblasDoubleComplex& lhs, const hipblasDoubleComplex& rhs) { reinterpret_cast&>(lhs) *= reinterpret_cast&>(rhs); return lhs; } inline hipblasComplex operator*(hipblasComplex lhs, const hipblasComplex& rhs) { return lhs *= rhs; } inline hipblasDoubleComplex operator*(hipblasDoubleComplex lhs, const hipblasDoubleComplex& rhs) { return lhs *= rhs; } inline hipblasComplex& operator/=(hipblasComplex& lhs, const hipblasComplex& rhs) { reinterpret_cast&>(lhs) /= reinterpret_cast&>(rhs); return lhs; } inline hipblasDoubleComplex& operator/=(hipblasDoubleComplex& lhs, const hipblasDoubleComplex& rhs) { reinterpret_cast&>(lhs) /= reinterpret_cast&>(rhs); return lhs; } inline hipblasComplex operator/(hipblasComplex lhs, const hipblasComplex& rhs) { return lhs /= rhs; } inline hipblasDoubleComplex operator/(hipblasDoubleComplex lhs, const hipblasDoubleComplex& rhs) { return lhs /= rhs; } inline bool operator==(const hipblasComplex& lhs, const hipblasComplex& rhs) { return reinterpret_cast&>(lhs) == reinterpret_cast&>(rhs); } inline bool operator!=(const hipblasComplex& lhs, const hipblasComplex& rhs) { return !(lhs == rhs); } inline bool operator==(const hipblasDoubleComplex& lhs, const hipblasDoubleComplex& rhs) { return reinterpret_cast&>(lhs) == reinterpret_cast&>(rhs); } inline bool operator!=(const hipblasDoubleComplex& lhs, const hipblasDoubleComplex& rhs) { return !(lhs == rhs); } inline hipblasComplex operator-(const hipblasComplex& x) { return {-x.real(), -x.imag()}; } inline hipblasDoubleComplex operator-(const hipblasDoubleComplex& x) { return {-x.real(), -x.imag()}; } inline hipblasComplex operator+(const hipblasComplex& x) { return x; } inline hipblasDoubleComplex operator+(const hipblasDoubleComplex& x) { return x; } namespace std { inline float real(const hipblasComplex& z) { return z.real(); } inline double real(const hipblasDoubleComplex& z) { return z.real(); } inline float imag(const hipblasComplex& z) { return z.imag(); } inline double imag(const hipblasDoubleComplex& z) { return z.imag(); } inline hipblasComplex conj(const hipblasComplex& z) { return {z.real(), -z.imag()}; } inline hipblasDoubleComplex conj(const hipblasDoubleComplex& z) { return {z.real(), -z.imag()}; } inline float abs(const hipblasComplex& z) { return abs(reinterpret_cast&>(z)); } inline double abs(const hipblasDoubleComplex& z) { return abs(reinterpret_cast&>(z)); } inline float conj(const float& r) { return r; } inline double conj(const double& r) { return r; } } #endif hipBLAS-rocm-5.5.1/clients/include/d_vector.hpp000066400000000000000000000074601434647641600213320ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #pragma once #include "hipblas.h" #include #include #include /* ============================================================================================ */ /*! \brief base-class to allocate/deallocate device memory */ template class d_vector { protected: size_t size, bytes; inline size_t nmemb() const noexcept { return size; } #ifdef GOOGLE_TEST U guard[PAD]; d_vector(size_t s) : size(s) , bytes((s + PAD * 2) * sizeof(T)) { // Initialize guard with random data if(PAD > 0) { hipblas_init_nan(guard, PAD); } } #else d_vector(size_t s) : size(s) , bytes(s ? s * sizeof(T) : sizeof(T)) { } #endif T* device_vector_setup() { T* d; if((hipMalloc)(&d, bytes) != hipSuccess) { static char* lc = setlocale(LC_NUMERIC, ""); fprintf(stderr, "Error allocating %'zu bytes (%zu GB)\n", bytes, bytes >> 30); d = nullptr; } #ifdef GOOGLE_TEST else { if(PAD > 0) { // Copy guard to device memory before allocated memory CHECK_HIP_ERROR(hipMemcpy(d, guard, sizeof(guard), hipMemcpyHostToDevice)); // Point to allocated block d += PAD; // Copy guard to device memory after allocated memory CHECK_HIP_ERROR(hipMemcpy(d + size, guard, sizeof(guard), hipMemcpyHostToDevice)); } } #endif return d; } void device_vector_teardown(T* d) { if(d != nullptr) { #ifdef GOOGLE_TEST if(PAD > 0) { U host[PAD]; // Copy device memory after allocated memory to host CHECK_HIP_ERROR(hipMemcpy(host, d + size, sizeof(guard), hipMemcpyDeviceToHost)); // Make sure no corruption has occurred EXPECT_EQ(memcmp(host, guard, sizeof(guard)), 0); // Point to guard before allocated memory d -= PAD; // Copy device memory after allocated memory to host CHECK_HIP_ERROR(hipMemcpy(host, d, sizeof(guard), hipMemcpyDeviceToHost)); // Make sure no corruption has occurred EXPECT_EQ(memcmp(host, guard, sizeof(guard)), 0); } #endif // Free device memory CHECK_HIP_ERROR((hipFree)(d)); } } }; hipBLAS-rocm-5.5.1/clients/include/device_batch_vector.hpp000066400000000000000000000206511434647641600235040ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ // #pragma once // #include "d_vector.hpp" // #include "hipblas_vector.hpp" #include // // Local declaration of the host strided batch vector. // template class host_batch_vector; //! //! @brief pseudo-vector subclass which uses a batch of device memory pointers and //! - an array of pointers in host memory //! - an array of pointers in device memory //! template class device_batch_vector : private d_vector { public: //! //! @brief Disallow copying. //! device_batch_vector(const device_batch_vector&) = delete; //! //! @brief Disallow assigning. //! device_batch_vector& operator=(const device_batch_vector&) = delete; //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param batch_count The batch count. //! explicit device_batch_vector(int n, int inc, int batch_count) : m_n(n) , m_inc(inc ? inc : 1) , m_batch_count(batch_count) , d_vector(size_t(n) * std::abs(inc ? inc : 1)) { if(false == this->try_initialize_memory()) { this->free_memory(); } } //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param stride (UNUSED) The stride. //! @param batch_count The batch count. //! explicit device_batch_vector(int n, int inc, hipblasStride stride, int batch_count) : device_batch_vector(n, inc, batch_count) { } //! //! @brief Constructor (kept for backward compatibility only, to be removed). //! @param batch_count The number of vectors. //! @param size_vector The size of each vectors. //! explicit device_batch_vector(int batch_count, size_t size_vector) : device_batch_vector(size_vector, 1, batch_count) { } //! //! @brief Destructor. //! ~device_batch_vector() { this->free_memory(); } //! //! @brief Returns the length of the vector. //! int n() const { return this->m_n; } //! //! @brief Returns the increment of the vector. //! int inc() const { return this->m_inc; } //! //! @brief Returns the value of batch_count. //! int batch_count() const { return this->m_batch_count; } //! //! @brief Returns the stride value. //! hipblasStride stride() const { return 0; } //! //! @brief Access to device data. //! @return Pointer to the device data. //! T** ptr_on_device() { return this->m_device_data; } //! //! @brief Const access to device data. //! @return Const pointer to the device data. //! const T* const* ptr_on_device() const { return this->m_device_data; } //! //! @brief access to device data. //! @return Const pointer to the device data. //! T* const* const_batch_ptr() { return this->m_device_data; } //! //! @brief Random access. //! @param batch_index The batch index. //! @return Pointer to the array on device. //! T* operator[](int batch_index) { return this->m_data[batch_index]; } //! //! @brief Constant random access. //! @param batch_index The batch index. //! @return Constant pointer to the array on device. //! const T* operator[](int batch_index) const { return this->m_data[batch_index]; } //! //! @brief Const cast of the data on host. //! operator const T* const *() const { return this->m_data; } //! //! @brief Cast of the data on host. //! // clang-format off operator T**() // clang-format on { return this->m_data; } //! //! @brief Tell whether ressources allocation failed. //! explicit operator bool() const { return nullptr != this->m_data; } //! //! @brief Copy from a host batched vector. //! @param that The host_batch_vector to copy. //! hipError_t transfer_from(const host_batch_vector& that) { hipError_t hip_err; // // Copy each vector. // for(int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { if(hipSuccess != (hip_err = hipMemcpy((*this)[batch_index], that[batch_index], sizeof(T) * this->nmemb(), hipMemcpyHostToDevice))) { return hip_err; } } return hipSuccess; } //! //! @brief Check if memory exists. //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. //! hipError_t memcheck() const { if(*this) return hipSuccess; else return hipErrorOutOfMemory; } private: int m_n{}; int m_inc{}; int m_batch_count{}; T** m_data{}; T** m_device_data{}; //! //! @brief Try to allocate the ressources. //! @return true if success false otherwise. //! bool try_initialize_memory() { bool success = false; success = (hipSuccess == (hipMalloc)(&this->m_device_data, this->m_batch_count * sizeof(T*))); if(success) { success = (nullptr != (this->m_data = (T**)calloc(this->m_batch_count, sizeof(T*)))); if(success) { for(int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { success = (nullptr != (this->m_data[batch_index] = this->device_vector_setup())); if(!success) { break; } } if(success) { success = (hipSuccess == hipMemcpy(this->m_device_data, this->m_data, sizeof(T*) * this->m_batch_count, hipMemcpyHostToDevice)); } } } return success; } //! //! @brief Free the ressources, as much as we can. //! void free_memory() { if(nullptr != this->m_data) { for(int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { if(nullptr != this->m_data[batch_index]) { this->device_vector_teardown(this->m_data[batch_index]); this->m_data[batch_index] = nullptr; } } free(this->m_data); this->m_data = nullptr; } if(nullptr != this->m_device_data) { auto tmp_device_data = this->m_device_data; this->m_device_data = nullptr; CHECK_HIP_ERROR((hipFree)(tmp_device_data)); } } }; hipBLAS-rocm-5.5.1/clients/include/flops.hpp000066400000000000000000000526601434647641600206520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************/ #ifndef _HIPBLAS_FLOPS_H_ #define _HIPBLAS_FLOPS_H_ #include "hipblas.h" /*!\file * \brief provides Floating point counts of Basic Linear Algebra Subprograms (BLAS) of Level 1, 2, * 3. Where possible we are using the values of NOP from the legacy BLAS files [sdcz]blas[23]time.f * for flop count. */ inline size_t sym_tri_count(int n) { return size_t(n) * (1 + n) / 2; } /* * =========================================================================== * level 1 BLAS * =========================================================================== */ // asum template constexpr double asum_gflop_count(int n) { return (2.0 * n) / 1e9; } template <> constexpr double asum_gflop_count(int n) { return (4.0 * n) / 1e9; } template <> constexpr double asum_gflop_count(int n) { return (4.0 * n) / 1e9; } // axpy template constexpr double axpy_gflop_count(int n) { return (2.0 * n) / 1e9; } template <> constexpr double axpy_gflop_count(int n) { return (8.0 * n) / 1e9; // 6 for complex-complex multiply, 2 for c-c add } template <> constexpr double axpy_gflop_count(int n) { return (8.0 * n) / 1e9; } // copy template constexpr double copy_gflop_count(int n) { return (n) / 1e9; // no actual operations but reporting to be consistent } // dot template constexpr double dot_gflop_count(int n) { return (2.0 * n) / 1e9; } template <> constexpr double dot_gflop_count(int n) { return (8.0 * n) / 1e9; // 6 for each c-c multiply, 2 for each c-c add } template <> constexpr double dot_gflop_count(int n) { return (8.0 * n) / 1e9; } template <> constexpr double dot_gflop_count(int n) { return (9.0 * n) / 1e9; // regular dot (8n) + 1n for complex conjugate } template <> constexpr double dot_gflop_count(int n) { return (9.0 * n) / 1e9; } // iamax/iamin template constexpr double iamax_gflop_count(int n) { return (1.0 * n) / 1e9; } // nrm2 template constexpr double nrm2_gflop_count(int n) { return (2.0 * n) / 1e9; } template <> constexpr double nrm2_gflop_count(int n) { return (6.0 * n + 2.0 * n) / 1e9; } template <> constexpr double nrm2_gflop_count(int n) { return nrm2_gflop_count(n); } // rot template constexpr double rot_gflop_count(int n) { return (6.0 * n) / 1e9; //4 real multiplication, 1 addition , 1 subtraction } template <> constexpr double rot_gflop_count(int n) { return (20.0 * n) / 1e9; // (6*2 n for c-c multiply)+(2*2 n for real-complex multiply) + 2n for c-c add + 2n for c-c sub } template <> constexpr double rot_gflop_count(int n) { return (12.0 * n) / 1e9; // (2*4 n for real-complex multiply) + 2n for c-c add + 2n for c-c sub } template <> constexpr double rot_gflop_count(int n) { return (20.0 * n) / 1e9; } template <> constexpr double rot_gflop_count(int n) { return (12.0 * n) / 1e9; } // rotm template constexpr double rotm_gflop_count(int n, Tx flag) { //No floating point operations when flag is set to -2.0 if(flag != -2.0) { if(flag < 0) return (6.0 * n) / 1e9; // 4 real multiplication, 2 addition else return (4.0 * n) / 1e9; // 2 real multiplication, 2 addition } else { return 0; } } // scal template constexpr double scal_gflop_count(int n) { return (1.0 * n) / 1e9; } template <> constexpr double scal_gflop_count(int n) { return (6.0 * n) / 1e9; // 6 for c-c multiply } template <> constexpr double scal_gflop_count(int n) { return (6.0 * n) / 1e9; } template <> constexpr double scal_gflop_count(int n) { return (2.0 * n) / 1e9; // 2 for real-complex multiply } template <> constexpr double scal_gflop_count(int n) { return (2.0 * n) / 1e9; } // swap template constexpr double swap_gflop_count(int n) { return (n) / 1e9; // no actual operations but reporting to be consistent } /* * =========================================================================== * level 2 BLAS * =========================================================================== */ /* \brief floating point counts of tpmv */ template constexpr double tpmv_gflop_count(int m) { return (double(m) * m) / 1e9; } template <> constexpr double tpmv_gflop_count(int m) { return (4.0 * m * m) / 1e9; } template <> constexpr double tpmv_gflop_count(int m) { return tpmv_gflop_count(m); } /* \brief floating point counts of trmv */ template constexpr double trmv_gflop_count(int m) { return (double(m) * m) / 1e9; } template <> constexpr double trmv_gflop_count(int m) { return (4.0 * m * m) / 1e9; } template <> constexpr double trmv_gflop_count(int m) { return trmv_gflop_count(m); } /* \brief floating point counts of GBMV */ template constexpr double gbmv_gflop_count(hipblasOperation_t transA, int m, int n, int kl, int ku) { int dim_x = transA == HIPBLAS_OP_N ? n : m; int k1 = dim_x < kl ? dim_x : kl; int k2 = dim_x < ku ? dim_x : ku; // kl and ku ops, plus main diagonal ops double d1 = ((2 * k1 * dim_x) - (k1 * (k1 + 1))) + dim_x; double d2 = ((2 * k2 * dim_x) - (k2 * (k2 + 1))) + 2 * dim_x; // add y operations return (d1 + d2 + 2 * dim_x) / 1e9; } template <> constexpr double gbmv_gflop_count(hipblasOperation_t transA, int m, int n, int kl, int ku) { int dim_x = transA == HIPBLAS_OP_N ? n : m; int k1 = dim_x < kl ? dim_x : kl; int k2 = dim_x < ku ? dim_x : ku; double d1 = 4 * ((2 * k1 * dim_x) - (k1 * (k1 + 1))) + 6 * dim_x; double d2 = 4 * ((2 * k2 * dim_x) - (k2 * (k2 + 1))) + 8 * dim_x; return (d1 + d2 + 8 * dim_x) / 1e9; } template <> constexpr double gbmv_gflop_count(hipblasOperation_t transA, int m, int n, int kl, int ku) { int dim_x = transA == HIPBLAS_OP_N ? n : m; int k1 = dim_x < kl ? dim_x : kl; int k2 = dim_x < ku ? dim_x : ku; double d1 = 4 * ((2 * k1 * dim_x) - (k1 * (k1 + 1))) + 6 * dim_x; double d2 = 4 * ((2 * k2 * dim_x) - (k2 * (k2 + 1))) + 8 * dim_x; return (d1 + d2 + 8 * dim_x) / 1e9; } /* \brief floating point counts of GEMV */ template constexpr double gemv_gflop_count(hipblasOperation_t transA, int m, int n) { return (2.0 * m * n + 2.0 * (transA == HIPBLAS_OP_N ? m : n)) / 1e9; } template <> constexpr double gemv_gflop_count(hipblasOperation_t transA, int m, int n) { return (8.0 * m * n + 6.0 * (transA == HIPBLAS_OP_N ? m : n)) / 1e9; } template <> constexpr double gemv_gflop_count(hipblasOperation_t transA, int m, int n) { return (8.0 * m * n + 6.0 * (transA == HIPBLAS_OP_N ? m : n)) / 1e9; } /* \brief floating point counts of HBMV */ template constexpr double hbmv_gflop_count(int n, int k) { int k1 = k < n ? k : n; return (8.0 * ((2 * k1 + 1) * n - k1 * (k1 + 1)) + 8 * n) / 1e9; } /* \brief floating point counts of HEMV */ template constexpr double hemv_gflop_count(int n) { return (8.0 * n * n + 8.0 * n) / 1e9; } /* \brief floating point counts of HER */ template constexpr double her_gflop_count(int n) { return (4.0 * n * n) / 1e9; } /* \brief floating point counts of HER2 */ template constexpr double her2_gflop_count(int n) { return (8.0 * (n + 1) * n) / 1e9; } /* \brief floating point counts of HPMV */ template constexpr double hpmv_gflop_count(int n) { return (8.0 * n * n + 8.0 * n) / 1e9; } /* \brief floating point counts of HPR */ template constexpr double hpr_gflop_count(int n) { return (4.0 * n * n) / 1e9; } /* \brief floating point counts of HPR2 */ template constexpr double hpr2_gflop_count(int n) { return (8.0 * (n + 1) * n) / 1e9; } /* \brief floating point counts or TBSV */ template constexpr double tbsv_gflop_count(int n, int k) { int k1 = std::min(k, n); return ((2.0 * n * k1 - k1 * (k1 + 1)) + n) / 1e9; } template <> constexpr double tbsv_gflop_count(int n, int k) { int k1 = std::min(k, n); return (4.0 * (2.0 * n * k1 - k1 * (k1 + 1)) + 4.0 * n) / 1e9; } template <> constexpr double tbsv_gflop_count(int n, int k) { return tbsv_gflop_count(n, k); } /* \brief floating point counts of TRSV */ template constexpr double trsv_gflop_count(int n) { return (double(n) * n) / 1e9; } template <> constexpr double trsv_gflop_count(int n) { return (4.0 * n * n) / 1e9; } template <> constexpr double trsv_gflop_count(int n) { return trsv_gflop_count(n); } /* \brief floating point counts of TBMV */ template constexpr double tbmv_gflop_count(int m, int k) { int k1 = k < m ? k : m; return ((2.0 * m * k1 - double(k1) * (k1 + 1)) + m) / 1e9; } template <> constexpr double tbmv_gflop_count(int m, int k) { int k1 = k < m ? k : m; return (4.0 * (2.0 * m * k1 - double(k1) * (k1 + 1)) + 4.0 * m) / 1e9; } template <> constexpr double tbmv_gflop_count(int m, int k) { int k1 = k < m ? k : m; return (4.0 * (2.0 * m * k1 - double(k1) * (k1 + 1)) + 4.0 * m) / 1e9; } /* \brief floating point counts of TPSV */ template constexpr double tpsv_gflop_count(int n) { return (double(n) * n) / 1e9; } template <> constexpr double tpsv_gflop_count(int n) { return (4.0 * n * n) / 1e9; } template <> constexpr double tpsv_gflop_count(int n) { return tpsv_gflop_count(n); } /* \brief floating point counts of SY(HE)MV */ template constexpr double symv_gflop_count(int n) { return (2.0 * n * n + 2.0 * n) / 1e9; } template <> constexpr double symv_gflop_count(int n) { return 4.0 * symv_gflop_count(n); } template <> constexpr double symv_gflop_count(int n) { return symv_gflop_count(n); } /* \brief floating point counts of SPMV */ template constexpr double spmv_gflop_count(int n) { return (2.0 * n * n + 2.0 * n) / 1e9; } /* \brief floating point counts of SBMV */ template constexpr double sbmv_gflop_count(int n, int k) { int k1 = k < n ? k : n; return (2.0 * ((2.0 * k1 + 1) * n - k1 * (k1 + 1)) + 2.0 * n) / 1e9; } /* \brief floating point counts of SPR */ template constexpr double spr_gflop_count(int n) { return (double(n) * (n + 1.0) + n) / 1e9; } template <> constexpr double spr_gflop_count(int n) { return (6.0 * n + 4.0 * n * (n + 1.0)) / 1e9; } template <> constexpr double spr_gflop_count(int n) { return spr_gflop_count(n); } /* \brief floating point counts of SPR2 */ template constexpr double spr2_gflop_count(int n) { return (2.0 * (n + 1.0) * n + 2.0 * n) / 1e9; } /* \brief floating point counts of GER */ template constexpr double ger_gflop_count(int m, int n) { return (6.0 * (double(m) * n + std::min(m, n)) + 2.0 * m * n) / 1e9; } template <> constexpr double ger_gflop_count(int m, int n) { return ((2.0 * m * n) + std::min(m, n)) / 1e9; } template <> constexpr double ger_gflop_count(int m, int n) { return ger_gflop_count(m, n); } /* \brief floating point counts of SYR */ template constexpr double syr_gflop_count(int n) { return (n * (double(n) + 1.0) + n) / 1e9; } template <> constexpr double syr_gflop_count(int n) { return 4.0 * syr_gflop_count(n); } template <> constexpr double syr_gflop_count(int n) { return syr_gflop_count(n); } /* \brief floating point counts of SYR2 */ template constexpr double syr2_gflop_count(int n) { return (2.0 * (n + 1.0) * n + 2.0 * n) / 1e9; } template <> constexpr double syr2_gflop_count(int n) { return (8.0 * (n + 1.0) * n + 12.0 * n) / 1e9; } template <> constexpr double syr2_gflop_count(int n) { return (8.0 * (n + 1.0) * n + 12.0 * n) / 1e9; } /* * =========================================================================== * level 3 BLAS * =========================================================================== */ /* \brief floating point counts of GEMM */ template constexpr double gemm_gflop_count(int m, int n, int k) { return (2.0 * m * n * k) / 1e9; } template <> constexpr double gemm_gflop_count(int m, int n, int k) { return (8.0 * m * n * k) / 1e9; } template <> constexpr double gemm_gflop_count(int m, int n, int k) { return (8.0 * m * n * k) / 1e9; } /* \brief floating point counts of GEAM */ template constexpr double geam_gflop_count(int m, int n) { return (3.0 * m * n) / 1e9; } template <> constexpr double geam_gflop_count(int m, int n) { return (14.0 * m * n) / 1e9; } template <> constexpr double geam_gflop_count(int m, int n) { return (14.0 * m * n) / 1e9; } /* \brief floating point counts of DGMM */ template constexpr double dgmm_gflop_count(int m, int n) { return (m * n) / 1e9; } template <> constexpr double dgmm_gflop_count(int m, int n) { return (6 * m * n) / 1e9; } template <> constexpr double dgmm_gflop_count(int m, int n) { return (6 * m * n) / 1e9; } /* \brief floating point counts of HEMM */ template constexpr double hemm_gflop_count(int m, int n, int k) { return (8.0 * m * k * n) / 1e9; } /* \brief floating point counts of HERK */ template constexpr double herk_gflop_count(int n, int k) { return (4.0 * n * n * k) / 1e9; } /* \brief floating point counts of HER2K */ template constexpr double her2k_gflop_count(int n, int k) { return (8.0 * n * n * k) / 1e9; } /* \brief floating point counts of HERKX */ template constexpr double herkx_gflop_count(int n, int k) { return (4.0 * n * n * k) / 1e9; } /* \brief floating point counts of SYMM */ template constexpr double symm_gflop_count(int m, int n, int k) { return (2.0 * m * k * n) / 1e9; } template <> constexpr double symm_gflop_count(int m, int n, int k) { return 4.0 * symm_gflop_count(m, n, k); } template <> constexpr double symm_gflop_count(int m, int n, int k) { return symm_gflop_count(m, n, k); } /* \brief floating point counts of SYRK */ template constexpr double syrk_gflop_count(int n, int k) { return (1.0 * n * n * k) / 1e9; } template <> constexpr double syrk_gflop_count(int n, int k) { return 4.0 * syrk_gflop_count(n, k); } template <> constexpr double syrk_gflop_count(int n, int k) { return syrk_gflop_count(n, k); } /* \brief floating point counts of SYR2K */ template constexpr double syr2k_gflop_count(int n, int k) { return (2.0 * n * n * k) / 1e9; } template <> constexpr double syr2k_gflop_count(int n, int k) { return 4.0 * syr2k_gflop_count(n, k); } template <> constexpr double syr2k_gflop_count(int n, int k) { return syr2k_gflop_count(n, k); } /* \brief floating point counts of SYRKX */ template constexpr double syrkx_gflop_count(int n, int k) { return (2 * k * sym_tri_count(n)) / 1e9; } template <> constexpr double syrkx_gflop_count(int n, int k) { return 4.0 * syrkx_gflop_count(n, k); } template <> constexpr double syrkx_gflop_count(int n, int k) { return syrkx_gflop_count(n, k); } /* \brief floating point counts of TRSM */ template constexpr double trmm_gflop_count(int m, int n, int k) { return (1.0 * m * n * k) / 1e9; } template <> constexpr double trmm_gflop_count(int m, int n, int k) { return 4.0 * trmm_gflop_count(m, n, k); } template <> constexpr double trmm_gflop_count(int m, int n, int k) { return trmm_gflop_count(m, n, k); } /* \brief floating point counts of TRSM */ template constexpr double trsm_gflop_count(int m, int n, int k) { return (1.0 * m * n * k) / 1e9; } template <> constexpr double trsm_gflop_count(int m, int n, int k) { return 4.0 * trsm_gflop_count(m, n, k); } template <> constexpr double trsm_gflop_count(int m, int n, int k) { return trsm_gflop_count(m, n, k); } /* \brief floating point counts of TRTRI */ template constexpr double trtri_gflop_count(int n) { return (1.0 * n * n * n) / 3e9; } template <> constexpr double trtri_gflop_count(int n) { return (8.0 * n * n * n) / 3e9; } template <> constexpr double trtri_gflop_count(int n) { return (8.0 * n * n * n) / 3e9; } /* * =========================================================================== * Solver * =========================================================================== */ /* \brief floating point counts of GEQRF */ template constexpr double geqrf_gflop_count(int n, int m) { // Calculation is for m == n, using max of m, n for now int k = std::max(m, n); return ((4.0 / 3.0) * k * k * k); } template <> constexpr double geqrf_gflop_count(int n, int m) { return 4.0 * geqrf_gflop_count(n, m); } template <> constexpr double geqrf_gflop_count(int n, int m) { return 4.0 * geqrf_gflop_count(n, m); } /* \brief floating point counts of GETRF */ template constexpr double getrf_gflop_count(int n, int m) { return (m * n * n) / 1e9; } template <> constexpr double getrf_gflop_count(int n, int m) { return 4.0 * getrf_gflop_count(n, m); } template <> constexpr double getrf_gflop_count(int n, int m) { return 4.0 * getrf_gflop_count(n, m); } /* \brief floating point counts of GETRI */ template constexpr double getri_gflop_count(int n) { return ((4.0 / 3.0) * n * n * n) / 1e9; } template <> constexpr double getri_gflop_count(int n) { return 4.0 * getri_gflop_count(n); } template <> constexpr double getri_gflop_count(int n) { return 4.0 * getri_gflop_count(n); } /* \brief floating point counts of GETRS */ template constexpr double getrs_gflop_count(int n, int nrhs) { return (2.0 * n * n * nrhs) / 1e9; } template <> constexpr double getrs_gflop_count(int n, int nrhs) { return 4.0 * getrs_gflop_count(n, nrhs); } template <> constexpr double getrs_gflop_count(int n, int nrhs) { return 4.0 * getrs_gflop_count(n, nrhs); } /* \brief floating point counts of GELS */ template constexpr double gels_gflop_count(int m, int n) { // Not using this for now as better to just use exe. time int k = m >= n ? n : m; return ((2 * m * n * n) - ((2.0 / 3.0) * k * k * k)) / 1e9; } template <> constexpr double gels_gflop_count(int m, int n) { return 4 * gels_gflop_count(m, n); } template <> constexpr double gels_gflop_count(int m, int n) { return 4 * gels_gflop_count(m, n); } #endif /* _HIPBLAS_FLOPS_H_ */ hipBLAS-rocm-5.5.1/clients/include/hipblas.hpp000066400000000000000000003360211434647641600211450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef _HIPBLAS_HPP_ #define _HIPBLAS_HPP_ /* library headers */ #include "hipblas.h" /*!\file * \brief hipblasTemplate_api.h provides Basic Linear Algebra Subprograms of Level 1, 2 and 3, * using HIP optimized for AMD HCC-based GPU hardware. This library can also run on CUDA-based * NVIDIA GPUs. * This file exposes C++ templated BLAS interface with only the precision templated. */ /* * =========================================================================== * READEME: Please follow the naming convention * Big case for matrix, e.g. matrix A, B, C GEMM (C = A*B) * Lower case for vector, e.g. vector x, y GEMV (y = A*x) * =========================================================================== */ template hipblasStatus_t hipblasScal(hipblasHandle_t handle, int n, const U* alpha, T* x, int incx); template hipblasStatus_t hipblasScalBatched( hipblasHandle_t handle, int n, const U* alpha, T* const x[], int incx, int batch_count); template hipblasStatus_t hipblasScalStridedBatched(hipblasHandle_t handle, int n, const U* alpha, T* x, int incx, hipblasStride stridex, int batch_count); template hipblasStatus_t hipblasCopy(hipblasHandle_t handle, int n, const T* x, int incx, T* y, int incy); template hipblasStatus_t hipblasCopyBatched(hipblasHandle_t handle, int n, const T* const x[], int incx, T* const y[], int incy, int batch_count); template hipblasStatus_t hipblasCopyStridedBatched(hipblasHandle_t handle, int n, const T* x, int incx, hipblasStride stridex, T* y, int incy, hipblasStride stridey, int batch_count); template hipblasStatus_t hipblasSwap(hipblasHandle_t handle, int n, T* x, int incx, T* y, int incy); template hipblasStatus_t hipblasSwapBatched( hipblasHandle_t handle, int n, T* x[], int incx, T* y[], int incy, int batch_count); template hipblasStatus_t hipblasSwapStridedBatched(hipblasHandle_t handle, int n, T* x, int incx, hipblasStride stridex, T* y, int incy, hipblasStride stridey, int batch_count); template hipblasStatus_t hipblasDot( hipblasHandle_t handle, int n, const T* x, int incx, const T* y, int incy, T* result); template hipblasStatus_t hipblasDotc( hipblasHandle_t handle, int n, const T* x, int incx, const T* y, int incy, T* result); template hipblasStatus_t hipblasDotBatched(hipblasHandle_t handle, int n, const T* const x[], int incx, const T* const y[], int incy, int batch_count, T* result); template hipblasStatus_t hipblasDotcBatched(hipblasHandle_t handle, int n, const T* const x[], int incx, const T* const y[], int incy, int batch_count, T* result); template hipblasStatus_t hipblasDotStridedBatched(hipblasHandle_t handle, int n, const T* x, int incx, hipblasStride stridex, const T* y, int incy, hipblasStride stridey, int batch_count, T* result); template hipblasStatus_t hipblasDotcStridedBatched(hipblasHandle_t handle, int n, const T* x, int incx, hipblasStride stridex, const T* y, int incy, hipblasStride stridey, int batch_count, T* result); template hipblasStatus_t hipblasAsum(hipblasHandle_t handle, int n, const T1* x, int incx, T2* result); template hipblasStatus_t hipblasAsumBatched( hipblasHandle_t handle, int n, const T1* const x[], int incx, int batch_count, T2* result); template hipblasStatus_t hipblasAsumStridedBatched(hipblasHandle_t handle, int n, const T1* x, int incx, hipblasStride stridex, int batch_count, T2* result); template hipblasStatus_t hipblasNrm2(hipblasHandle_t handle, int n, const T1* x, int incx, T2* result); template hipblasStatus_t hipblasNrm2Batched( hipblasHandle_t handle, int n, const T1* const x[], int incx, int batch_count, T2* result); template hipblasStatus_t hipblasNrm2StridedBatched(hipblasHandle_t handle, int n, const T1* x, int incx, hipblasStride stridex, int batch_count, T2* result); template hipblasStatus_t hipblasRot( hipblasHandle_t handle, int n, T1* x, int incx, T1* y, int incy, const T2* c, const T3* s); template hipblasStatus_t hipblasRotBatched(hipblasHandle_t handle, int n, T1* const x[], int incx, T1* const y[], int incy, const T2* c, const T3* s, int batch_count); template hipblasStatus_t hipblasRotStridedBatched(hipblasHandle_t handle, int n, T1* x, int incx, hipblasStride stridex, T1* y, int incy, hipblasStride stridey, const T2* c, const T3* s, int batch_count); template hipblasStatus_t hipblasRotg(hipblasHandle_t handle, T1* a, T1* b, T2* c, T1* s); template hipblasStatus_t hipblasRotgBatched(hipblasHandle_t handle, T1* const a[], T1* const b[], T2* const c[], T1* const s[], int batch_count); template hipblasStatus_t hipblasRotgStridedBatched(hipblasHandle_t handle, T1* a, hipblasStride stridea, T1* b, hipblasStride strideb, T2* c, hipblasStride stridec, T1* s, hipblasStride strides, int batch_count); template hipblasStatus_t hipblasRotm(hipblasHandle_t handle, int n, T* x, int incx, T* y, int incy, const T* param); template hipblasStatus_t hipblasRotmBatched(hipblasHandle_t handle, int n, T* const x[], int incx, T* const y[], int incy, const T* const param[], int batch_count); template hipblasStatus_t hipblasRotmStridedBatched(hipblasHandle_t handle, int n, T* x, int incx, hipblasStride stridex, T* y, int incy, hipblasStride stridey, const T* param, hipblasStride strideparam, int batch_count); template hipblasStatus_t hipblasRotmg(hipblasHandle_t handle, T* d1, T* d2, T* x1, const T* y1, T* param); template hipblasStatus_t hipblasRotmgBatched(hipblasHandle_t handle, T* const d1[], T* const d2[], T* const x1[], const T* const y1[], T* const param[], int batch_count); template hipblasStatus_t hipblasRotmgStridedBatched(hipblasHandle_t handle, T* d1, hipblasStride stride_d1, T* d2, hipblasStride stride_d2, T* x1, hipblasStride stride_x1, const T* y1, hipblasStride stride_y1, T* param, hipblasStride strideparam, int batch_count); template hipblasStatus_t hipblasIamax(hipblasHandle_t handle, int n, const T* x, int incx, int* result); template hipblasStatus_t hipblasIamaxBatched( hipblasHandle_t handle, int n, const T* const x[], int incx, int batch_count, int* result); template hipblasStatus_t hipblasIamaxStridedBatched(hipblasHandle_t handle, int n, const T* x, int incx, hipblasStride stridex, int batch_count, int* result); template hipblasStatus_t hipblasIamin(hipblasHandle_t handle, int n, const T* x, int incx, int* result); template hipblasStatus_t hipblasIaminBatched( hipblasHandle_t handle, int n, const T* const x[], int incx, int batch_count, int* result); template hipblasStatus_t hipblasIaminStridedBatched(hipblasHandle_t handle, int n, const T* x, int incx, hipblasStride stridex, int batch_count, int* result); template hipblasStatus_t hipblasAxpy( hipblasHandle_t handle, int n, const T* alpha, const T* x, int incx, T* y, int incy); template hipblasStatus_t hipblasAxpyBatched(hipblasHandle_t handle, int n, const T* alpha, const T* const x[], int incx, T* const y[], int incy, int batch_count); template hipblasStatus_t hipblasAxpyStridedBatched(hipblasHandle_t handle, int n, const T* alpha, const T* x, int incx, hipblasStride stridex, T* y, int incy, hipblasStride stridey, int batch_count); // ger template hipblasStatus_t hipblasGer(hipblasHandle_t handle, int m, int n, const T* alpha, const T* x, int incx, const T* y, int incy, T* A, int lda); template hipblasStatus_t hipblasGerBatched(hipblasHandle_t handle, int m, int n, const T* alpha, const T* const x[], int incx, const T* const y[], int incy, T* const A[], int lda, int batch_count); template hipblasStatus_t hipblasGerStridedBatched(hipblasHandle_t handle, int m, int n, const T* alpha, const T* x, int incx, hipblasStride stridex, const T* y, int incy, hipblasStride stridey, T* A, int lda, hipblasStride strideA, int batch_count); // hbmv template hipblasStatus_t hipblasHbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const T* alpha, const T* A, int lda, const T* x, int incx, const T* beta, T* y, int incy); template hipblasStatus_t hipblasHbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const T* alpha, const T* const A[], int lda, const T* const x[], int incx, const T* beta, T* const y[], int incy, int batchCount); template hipblasStatus_t hipblasHbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* x, int incx, hipblasStride stridex, const T* beta, T* y, int incy, hipblasStride stridey, int batchCount); // hemv template hipblasStatus_t hipblasHemv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* A, int lda, const T* x, int incx, const T* beta, T* y, int incy); template hipblasStatus_t hipblasHemvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const A[], int lda, const T* const x[], int incx, const T* beta, T* const y[], int incy, int batch_count); template hipblasStatus_t hipblasHemvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* A, int lda, hipblasStride stride_a, const T* x, int incx, hipblasStride stride_x, const T* beta, T* y, int incy, hipblasStride stride_y, int batch_count); // her template hipblasStatus_t hipblasHer(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const U* alpha, const T* x, int incx, T* A, int lda); template hipblasStatus_t hipblasHerBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const U* alpha, const T* const x[], int incx, T* const A[], int lda, int batchCount); template hipblasStatus_t hipblasHerStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const U* alpha, const T* x, int incx, hipblasStride stridex, T* A, int lda, hipblasStride strideA, int batchCount); // her2 template hipblasStatus_t hipblasHer2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, const T* y, int incy, T* A, int lda); template hipblasStatus_t hipblasHer2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const x[], int incx, const T* const y[], int incy, T* const A[], int lda, int batchCount); template hipblasStatus_t hipblasHer2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, hipblasStride stridex, const T* y, int incy, hipblasStride stridey, T* A, int lda, hipblasStride strideA, int batchCount); // hpmv template hipblasStatus_t hipblasHpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* AP, const T* x, int incx, const T* beta, T* y, int incy); template hipblasStatus_t hipblasHpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const AP[], const T* const x[], int incx, const T* beta, T* const y[], int incy, int batchCount); template hipblasStatus_t hipblasHpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* AP, hipblasStride strideAP, const T* x, int incx, hipblasStride stridex, const T* beta, T* y, int incy, hipblasStride stridey, int batchCount); // hpr template hipblasStatus_t hipblasHpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const U* alpha, const T* x, int incx, T* AP); template hipblasStatus_t hipblasHprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const U* alpha, const T* const x[], int incx, T* const AP[], int batchCount); template hipblasStatus_t hipblasHprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const U* alpha, const T* x, int incx, hipblasStride stridex, T* AP, hipblasStride strideAP, int batchCount); // hpr2 template hipblasStatus_t hipblasHpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, const T* y, int incy, T* AP); template hipblasStatus_t hipblasHpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const x[], int incx, const T* const y[], int incy, T* const AP[], int batchCount); template hipblasStatus_t hipblasHpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, hipblasStride stridex, const T* y, int incy, hipblasStride stridey, T* AP, hipblasStride strideAP, int batchCount); // sbmv template hipblasStatus_t hipblasSbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const T* alpha, const T* A, int lda, const T* x, int incx, const T* beta, T* y, int incy); template hipblasStatus_t hipblasSbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const T* alpha, const T* const A[], int lda, const T* const x[], int incx, const T* beta, T* y[], int incy, int batchCount); template hipblasStatus_t hipblasSbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* x, int incx, hipblasStride stridex, const T* beta, T* y, int incy, hipblasStride stridey, int batchCount); // spmv template hipblasStatus_t hipblasSpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* AP, const T* x, int incx, const T* beta, T* y, int incy); template hipblasStatus_t hipblasSpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const AP[], const T* const x[], int incx, const T* beta, T* y[], int incy, int batchCount); template hipblasStatus_t hipblasSpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* AP, hipblasStride strideAP, const T* x, int incx, hipblasStride stridex, const T* beta, T* y, int incy, hipblasStride stridey, int batchCount); // spr template hipblasStatus_t hipblasSpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, T* AP); template hipblasStatus_t hipblasSprBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const x[], int incx, T* const AP[], int batchCount); template hipblasStatus_t hipblasSprStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, hipblasStride stridex, T* AP, hipblasStride strideAP, int batchCount); template hipblasStatus_t hipblasSpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, const T* y, int incy, T* AP); template hipblasStatus_t hipblasSpr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const x[], int incx, const T* const y[], int incy, T* const AP[], int batchCount); template hipblasStatus_t hipblasSpr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, hipblasStride stridex, const T* y, int incy, hipblasStride stridey, T* AP, hipblasStride strideAP, int batchCount); // symv template hipblasStatus_t hipblasSymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* A, int lda, const T* x, int incx, const T* beta, T* y, int incy); template hipblasStatus_t hipblasSymvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const A[], int lda, const T* const x[], int incx, const T* beta, T* y[], int incy, int batchCount); template hipblasStatus_t hipblasSymvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* x, int incx, hipblasStride stridex, const T* beta, T* y, int incy, hipblasStride stridey, int batchCount); // syr template hipblasStatus_t hipblasSyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, T* A, int lda); template hipblasStatus_t hipblasSyrBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const x[], int incx, T* const A[], int lda, int batch_count); template hipblasStatus_t hipblasSyrStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, hipblasStride stridex, T* A, int lda, hipblasStride strideA, int batch_count); // syr2 template hipblasStatus_t hipblasSyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, const T* y, int incy, T* A, int lda); template hipblasStatus_t hipblasSyr2Batched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* const x[], int incx, const T* const y[], int incy, T* const A[], int lda, int batchCount); template hipblasStatus_t hipblasSyr2StridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const T* alpha, const T* x, int incx, hipblasStride stridex, const T* y, int incy, hipblasStride stridey, T* A, int lda, hipblasStride strideA, int batchCount); // tbmv template hipblasStatus_t hipblasTbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const T* A, int lda, T* x, int incx); template hipblasStatus_t hipblasTbmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const T* const A[], int lda, T* const x[], int incx, int batch_count); template hipblasStatus_t hipblasTbmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const T* A, int lda, hipblasStride stride_a, T* x, int incx, hipblasStride stride_x, int batch_count); // tbsv template hipblasStatus_t hipblasTbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const T* A, int lda, T* x, int incx); template hipblasStatus_t hipblasTbsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const T* const A[], int lda, T* const x[], int incx, int batchCount); template hipblasStatus_t hipblasTbsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const T* A, int lda, hipblasStride strideA, T* x, int incx, hipblasStride stridex, int batchCount); // tpmv template hipblasStatus_t hipblasTpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* AP, T* x, int incx); template hipblasStatus_t hipblasTpmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* const AP[], T* const x[], int incx, int batchCount); template hipblasStatus_t hipblasTpmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* AP, hipblasStride strideAP, T* x, int incx, hipblasStride stridex, int batchCount); // tpsv template hipblasStatus_t hipblasTpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* AP, T* x, int incx); template hipblasStatus_t hipblasTpsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* const AP[], T* const x[], int incx, int batchCount); template hipblasStatus_t hipblasTpsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* AP, hipblasStride strideAP, T* x, int incx, hipblasStride stridex, int batchCount); // trmv template hipblasStatus_t hipblasTrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* A, int lda, T* x, int incx); template hipblasStatus_t hipblasTrmvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* const A[], int lda, T* const x[], int incx, int batch_count); template hipblasStatus_t hipblasTrmvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* A, int lda, hipblasStride stride_a, T* x, int incx, hipblasStride stride_x, int batch_count); // trsv template hipblasStatus_t hipblasTrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* A, int lda, T* x, int incx); // trsv_batched template hipblasStatus_t hipblasTrsvBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* const A[], int lda, T* const x[], int incx, int batch_count); // trsv_strided_batched template hipblasStatus_t hipblasTrsvStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const T* A, int lda, hipblasStride strideA, T* x, int incx, hipblasStride stridex, int batch_count); // gbmv template hipblasStatus_t hipblasGbmv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const T* alpha, const T* A, int lda, const T* x, int incx, const T* beta, T* y, int incy); template hipblasStatus_t hipblasGbmvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const T* alpha, const T* const A[], int lda, const T* const x[], int incx, const T* beta, T* const y[], int incy, int batch_count); template hipblasStatus_t hipblasGbmvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const T* alpha, const T* A, int lda, hipblasStride stride_a, const T* x, int incx, hipblasStride stride_x, const T* beta, T* y, int incy, hipblasStride stride_y, int batch_count); // gemv template hipblasStatus_t hipblasGemv(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const T* alpha, const T* A, int lda, const T* x, int incx, const T* beta, T* y, int incy); template hipblasStatus_t hipblasGemvBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const T* alpha, const T* const A[], int lda, const T* const x[], int incx, const T* beta, T* const y[], int incy, int batch_count); template hipblasStatus_t hipblasGemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* x, int incx, hipblasStride stridex, const T* beta, T* y, int incy, hipblasStride stridey, int batch_count); template hipblasStatus_t hipblasGemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const T* alpha, const T* A, int lda, const T* B, int ldb, const T* beta, T* C, int ldc); template hipblasStatus_t hipblasGemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const T* alpha, const T* A, int lda, int bsa, const T* B, int ldb, int bsb, const T* beta, T* C, int ldc, int bsc, int batch_count); template hipblasStatus_t hipblasGemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const T* alpha, const T* const A[], int lda, const T* const B[], int ldb, const T* beta, T* const C[], int ldc, int batch_count); // herk template hipblasStatus_t hipblasHerk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const U* alpha, const T* A, int lda, const U* beta, T* C, int ldc); template hipblasStatus_t hipblasHerkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const U* alpha, const T* const A[], int lda, const U* beta, T* const C[], int ldc, int batchCount); template hipblasStatus_t hipblasHerkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const U* alpha, const T* A, int lda, hipblasStride strideA, const U* beta, T* C, int ldc, hipblasStride strideC, int batchCount); // her2k template hipblasStatus_t hipblasHer2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, const T* B, int ldb, const U* beta, T* C, int ldc); template hipblasStatus_t hipblasHer2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* const A[], int lda, const T* const B[], int ldb, const U* beta, T* const C[], int ldc, int batchCount); template hipblasStatus_t hipblasHer2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* B, int ldb, hipblasStride strideB, const U* beta, T* C, int ldc, hipblasStride strideC, int batchCount); // herkx template hipblasStatus_t hipblasHerkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, const T* B, int ldb, const U* beta, T* C, int ldc); template hipblasStatus_t hipblasHerkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* const A[], int lda, const T* const B[], int ldb, const U* beta, T* const C[], int ldc, int batchCount); template hipblasStatus_t hipblasHerkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* B, int ldb, hipblasStride strideB, const U* beta, T* C, int ldc, hipblasStride strideC, int batchCount); // symm template hipblasStatus_t hipblasSymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const T* alpha, const T* A, int lda, const T* B, int ldb, const T* beta, T* C, int ldc); template hipblasStatus_t hipblasSymmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const T* alpha, const T* const A[], int lda, const T* const B[], int ldb, const T* beta, T* const C[], int ldc, int batchCount); template hipblasStatus_t hipblasSymmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* B, int ldb, hipblasStride strideB, const T* beta, T* C, int ldc, hipblasStride strideC, int batchCount); // syrk template hipblasStatus_t hipblasSyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, const T* beta, T* C, int ldc); template hipblasStatus_t hipblasSyrkBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* const A[], int lda, const T* beta, T* const C[], int ldc, int batchCount); template hipblasStatus_t hipblasSyrkStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* beta, T* C, int ldc, hipblasStride strideC, int batchCount); // syr2k template hipblasStatus_t hipblasSyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, const T* B, int ldb, const T* beta, T* C, int ldc); template hipblasStatus_t hipblasSyr2kBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* const A[], int lda, const T* const B[], int ldb, const T* beta, T* const C[], int ldc, int batchCount); template hipblasStatus_t hipblasSyr2kStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* B, int ldb, hipblasStride strideB, const T* beta, T* C, int ldc, hipblasStride strideC, int batchCount); // syrkx template hipblasStatus_t hipblasSyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, const T* B, int ldb, const T* beta, T* C, int ldc); template hipblasStatus_t hipblasSyrkxBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* const A[], int lda, const T* const B[], int ldb, const T* beta, T* const C[], int ldc, int batchCount); template hipblasStatus_t hipblasSyrkxStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* B, int ldb, hipblasStride strideB, const T* beta, T* C, int ldc, hipblasStride strideC, int batchCount); // geam template hipblasStatus_t hipblasGeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const T* alpha, const T* A, int lda, const T* beta, const T* B, int ldb, T* C, int ldc); template hipblasStatus_t hipblasGeamBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const T* alpha, const T* const A[], int lda, const T* beta, const T* const B[], int ldb, T* const C[], int ldc, int batchCount); template hipblasStatus_t hipblasGeamStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* beta, const T* B, int ldb, hipblasStride strideB, T* C, int ldc, hipblasStride strideC, int batchCount); // hemm template hipblasStatus_t hipblasHemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const T* alpha, const T* A, int lda, const T* B, int ldb, const T* beta, T* C, int ldc); template hipblasStatus_t hipblasHemmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const T* alpha, const T* const A[], int lda, const T* const B[], int ldb, const T* beta, T* const C[], int ldc, int batchCount); template hipblasStatus_t hipblasHemmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const T* alpha, const T* A, int lda, hipblasStride strideA, const T* B, int ldb, hipblasStride strideB, const T* beta, T* C, int ldc, hipblasStride strideC, int batchCount); // trmm template hipblasStatus_t hipblasTrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const T* alpha, const T* A, int lda, T* B, int ldb); template hipblasStatus_t hipblasTrmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const T* alpha, const T* const A[], int lda, T* const B[], int ldb, int batchCount); template hipblasStatus_t hipblasTrmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const T* alpha, const T* A, int lda, hipblasStride strideA, T* B, int ldb, hipblasStride strideB, int batchCount); // trsm template hipblasStatus_t hipblasTrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const T* alpha, T* A, int lda, T* B, int ldb); template hipblasStatus_t hipblasTrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const T* alpha, T* const A[], int lda, T* B[], int ldb, int batch_count); template hipblasStatus_t hipblasTrsmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const T* alpha, T* A, int lda, hipblasStride strideA, T* B, int ldb, hipblasStride strideB, int batch_count); // getrf template hipblasStatus_t hipblasGetrf(hipblasHandle_t handle, const int n, T* A, const int lda, int* ipiv, int* info); template hipblasStatus_t hipblasGetrfBatched(hipblasHandle_t handle, const int n, T* const A[], const int lda, int* ipiv, int* info, const int batchCount); template hipblasStatus_t hipblasGetrfStridedBatched(hipblasHandle_t handle, const int n, T* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount); // getrs template hipblasStatus_t hipblasGetrs(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, T* A, const int lda, const int* ipiv, T* B, const int ldb, int* info); template hipblasStatus_t hipblasGetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, T* const A[], const int lda, const int* ipiv, T* const B[], const int ldb, int* info, const int batchCount); template hipblasStatus_t hipblasGetrsStridedBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, T* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, T* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount); // getri template hipblasStatus_t hipblasGetriBatched(hipblasHandle_t handle, const int n, T* const A[], const int lda, int* ipiv, T* const C[], const int ldc, int* info, const int batchCount); // geqrf template hipblasStatus_t hipblasGeqrf( hipblasHandle_t handle, const int m, const int n, T* A, const int lda, T* ipiv, int* info); template hipblasStatus_t hipblasGeqrfBatched(hipblasHandle_t handle, const int m, const int n, T* const A[], const int lda, T* const ipiv[], int* info, const int batchCount); template hipblasStatus_t hipblasGeqrfStridedBatched(hipblasHandle_t handle, const int m, const int n, T* A, const int lda, const hipblasStride strideA, T* ipiv, const hipblasStride strideP, int* info, const int batchCount); // gels template hipblasStatus_t hipblasGels(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, T* A, const int lda, T* B, const int ldb, int* info, int* deviceInfo); template hipblasStatus_t hipblasGelsBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, T* const A[], const int lda, T* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount); template hipblasStatus_t hipblasGelsStridedBatched(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, T* A, const int lda, const hipblasStride strideA, T* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount); // dgmm template hipblasStatus_t hipblasDgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const T* A, int lda, const T* x, int incx, T* C, int ldc); template hipblasStatus_t hipblasDgmmBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const T* const A[], int lda, const T* const x[], int incx, T* const C[], int ldc, int batch_count); template hipblasStatus_t hipblasDgmmStridedBatched(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const T* A, int lda, hipblasStride stride_A, const T* x, int incx, hipblasStride stride_x, T* C, int ldc, hipblasStride stride_C, int batch_count); // trtri template hipblasStatus_t hipblasTrtri(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, T* A, int lda, T* invA, int ldinvA); template hipblasStatus_t hipblasTrtriBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, T* A[], int lda, T* invA[], int ldinvA, int batch_count); template hipblasStatus_t hipblasTrtriStridedBatched(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, T* A, int lda, hipblasStride stride_A, T* invA, int ldinvA, hipblasStride stride_invA, int batch_count); template hipblasStatus_t hipblasTrtri_trsm(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, T* A, int lda, T* invA); #endif // _ROCBLAS_HPP_ hipBLAS-rocm-5.5.1/clients/include/hipblas_arguments.hpp000066400000000000000000000303541434647641600232320ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef _HIPBLAS_ARGUMENTS_HPP_ #define _HIPBLAS_ARGUMENTS_HPP_ #include "complex.hpp" #include "hipblas.h" #include "hipblas_datatype2string.hpp" #include "utility.h" #include #include #include #include #include #include #include // Predeclare enumerator enum hipblas_argument : int; // conversion helpers template inline T convert_alpha_beta(double r, double i) { return T(r); } template <> inline hipblasHalf convert_alpha_beta(double r, double i) { return float_to_half(r); } template <> inline hipblasComplex convert_alpha_beta(double r, double i) { return hipblasComplex(r, i); } template <> inline hipblasDoubleComplex convert_alpha_beta(double r, double i) { return hipblasDoubleComplex(r, i); } /*! \brief Class used to parse command arguments in both benchmark & gtest */ struct Arguments { // if you add or reorder members you must update FOR_EACH_ARGUMENT macro int M = 128; int N = 128; int K = 128; int KL = 128; int KU = 128; int rows = 128; int cols = 128; int lda = 128; int ldb = 128; int ldc = 128; int ldd = 128; hipblasDatatype_t a_type = HIPBLAS_R_32F; hipblasDatatype_t b_type = HIPBLAS_R_32F; hipblasDatatype_t c_type = HIPBLAS_R_32F; hipblasDatatype_t d_type = HIPBLAS_R_32F; hipblasDatatype_t compute_type = HIPBLAS_R_32F; int incx = 1; int incy = 1; int incd = 1; int incb = 1; double stride_scale = 1.0; hipblasStride stride_a; // stride_a > transA == 'N' ? lda * K : lda * M hipblasStride stride_b; // stride_b > transB == 'N' ? ldb * N : ldb * K hipblasStride stride_c; // stride_c > ldc * N hipblasStride stride_d; // stride_d > ldd * N hipblasStride stride_x; hipblasStride stride_y; int start = 1024; int end = 10240; int step = 1000; double alpha = 1.0; double alphai = 0.0; double beta = 0.0; double betai = 0.0; char transA = 'N'; char transB = 'N'; char side = 'L'; char uplo = 'L'; char diag = 'N'; int apiCallCount = 1; int batch_count = 10; bool fortran = false; int norm_check = 0; int unit_check = 1; int timing = 0; int iters = 10; int cold_iters = 2; uint32_t algo; int32_t solution_index; uint32_t flags; char function[64]; char name[64]; char category[64]; int atomics_mode = HIPBLAS_ATOMICS_NOT_ALLOWED; hipblas_initialization initialization = hipblas_initialization::rand_int; // clang-format off // Generic macro which operates over the list of arguments in order of declaration #define FOR_EACH_ARGUMENT(OPER, SEP) \ OPER(M) SEP \ OPER(N) SEP \ OPER(K) SEP \ OPER(KL) SEP \ OPER(KU) SEP \ OPER(rows) SEP \ OPER(cols) SEP \ OPER(lda) SEP \ OPER(ldb) SEP \ OPER(ldc) SEP \ OPER(ldd) SEP \ OPER(a_type) SEP \ OPER(b_type) SEP \ OPER(c_type) SEP \ OPER(d_type) SEP \ OPER(compute_type) SEP \ OPER(incx) SEP \ OPER(incy) SEP \ OPER(incd) SEP \ OPER(incb) SEP \ OPER(stride_scale) SEP \ OPER(stride_a) SEP \ OPER(stride_b) SEP \ OPER(stride_c) SEP \ OPER(stride_d) SEP \ OPER(stride_x) SEP \ OPER(stride_y) SEP \ OPER(start) SEP \ OPER(end) SEP \ OPER(step) SEP \ OPER(alpha) SEP \ OPER(alphai) SEP \ OPER(beta) SEP \ OPER(betai) SEP \ OPER(transA) SEP \ OPER(transB) SEP \ OPER(side) SEP \ OPER(uplo) SEP \ OPER(diag) SEP \ OPER(apiCallCount) SEP \ OPER(batch_count) SEP \ OPER(fortran) SEP \ OPER(norm_check) SEP \ OPER(unit_check) SEP \ OPER(timing) SEP \ OPER(iters) SEP \ OPER(cold_iters) SEP \ OPER(algo) SEP \ OPER(solution_index) SEP \ OPER(flags) SEP \ OPER(function) SEP \ OPER(name) SEP \ OPER(category) SEP \ OPER(atomics_mode) SEP \ OPER(initialization) // clang-format on // Validate input format. static void validate(std::istream& ifs); // Function to print Arguments out to stream in YAML format friend std::ostream& operator<<(std::ostream& str, const Arguments& arg); // Google Tests uses this with std:ostream automatically to dump parameters //friend std::ostream& operator<<(std::ostream& str, const Arguments& arg); // Function to read Arguments data from stream friend std::istream& operator>>(std::istream& str, Arguments& arg); // Convert (alpha, alphai) and (beta, betai) to a particular type // Return alpha, beta adjusted to 0 for when they are NaN template T get_alpha() const { return hipblas_isnan(alpha) || (is_complex && hipblas_isnan(alphai)) ? T(0.0) : convert_alpha_beta(alpha, alphai); } template T get_beta() const { return hipblas_isnan(beta) || (is_complex && hipblas_isnan(betai)) ? T(0.0) : convert_alpha_beta(beta, betai); } private: }; // We make sure that the Arguments struct is C-compatible /* static_assert(std::is_standard_layout{}, "Arguments is not a standard layout type, and thus is " "incompatible with C."); static_assert(std::is_trivial{}, "Arguments is not a trivial type, and thus is " "incompatible with C."); */ // Arguments enumerators // Create // enum hipblas_argument : int {e_M, e_N, e_K, e_KL, ... }; // There is an enum value for each case in FOR_EACH_ARGUMENT. // #define CREATE_ENUM(NAME) e_##NAME, enum hipblas_argument : int { FOR_EACH_ARGUMENT(CREATE_ENUM, ) }; #undef CREATE_ENUM // ArgumentsHelper contains a templated lambda apply<> where there is a template // specialization for each line in the CPP macro FOR_EACH_ARGUMENT. For example, // the first lambda is: apply = [](auto&& func, const Arguments& arg, auto){func("M", arg.m);}; // This lambda can be used to print "M" and arg.m. // // alpha and beta are specialized separately, because they need to use get_alpha() or get_beta(). // To prevent multiple definitions of specializations for alpha and beta, the hipblas_argument // enum for alpha and beta are changed to hipblas_argument(-1) and hipblas_argument(-2) during // the FOR_EACH_ARGUMENT loop. Those out-of-range enum values are not used except here, and are // only used so that the FOR_EACH_ARGUMENT loop can be used to loop over all of the arguments. #if __cplusplus >= 201703L // C++17 // ArgumentsHelper contains a templated lambda apply<> where there is a template // specialization for each line in the CPP macro FOR_EACH_ARGUMENT. For example, // the first lambda is: apply = [](auto&& func, const Arguments& arg, auto){func("M", arg.m)} // This lambda can be used to print "M" and arg.m namespace ArgumentsHelper { template static constexpr auto apply = nullptr; // Macro defining specializations for specific arguments // e_alpha and e_beta get turned into negative sentinel value specializations // clang-format off #define APPLY(NAME) \ template <> \ HIPBLAS_CLANG_STATIC constexpr auto \ apply = \ [](auto&& func, const Arguments& arg, auto) { func(#NAME, arg.NAME); } // Specialize apply for each Argument FOR_EACH_ARGUMENT(APPLY, ;); // Specialization for e_alpha template <> HIPBLAS_CLANG_STATIC constexpr auto apply = [](auto&& func, const Arguments& arg, auto T) { func("alpha", arg.get_alpha()); }; // Specialization for e_beta template <> HIPBLAS_CLANG_STATIC constexpr auto apply = [](auto&& func, const Arguments& arg, auto T) { func("beta", arg.get_beta()); }; }; // clang-format on #else // C++14. TODO: Remove when C++17 is used // clang-format off namespace ArgumentsHelper { #define APPLY(NAME) \ template <> \ struct apply \ { \ auto operator()() \ { \ return \ [](auto&& func, const Arguments& arg, auto) \ { \ func(#NAME, arg.NAME); \ }; \ } \ }; template struct apply { }; // Go through every argument and define specializations FOR_EACH_ARGUMENT(APPLY, ;); // Specialization for e_alpha template <> struct apply { auto operator()() { return [](auto&& func, const Arguments& arg, auto T) { func("alpha", arg.get_alpha()); }; } }; // Specialization for e_beta template <> struct apply { auto operator()() { return [](auto&& func, const Arguments& arg, auto T) { func("beta", arg.get_beta()); }; } }; }; // clang-format on #endif #undef APPLY #endif hipBLAS-rocm-5.5.1/clients/include/hipblas_common.yaml000066400000000000000000000275051434647641600226740ustar00rootroot00000000000000# Data types are defined as either aliases to Python-recognized ctypes, # or enums defined with c_int base clases and attributes. Datatypes: - int: c_int - hipblasDatatype_t: bases: [ c_int ] attr: f16_r: 150 f32_r: 151 f64_r: 152 f16_c: 153 f32_c: 154 f64_c: 155 i8_r: 160 u8_r: 161 i32_r: 162 u32_r: 163 i8_c: 164 u8_c: 165 i32_c: 166 u32_c: 167 bf16_r: 168 bf16_c: 169 - { half: f16_r, single: f32_r, double: f64_r } - { half complex: f16_c, single complex: f32_c, double complex: f64_c } - hipblas_initialization: bases: [ c_int ] attr: rand_int: 111 trig_float: 222 hpl: 333 - hipblas_atomics_mode: bases: [ c_int ] attr: atomics_not_allowed: 0 atomics_allowed: 1 Real precisions: &real_precisions - &half_precision { a_type: f16_r, b_type: f16_r, c_type: f16_r, d_type: f16_r, compute_type: f16_r } - &hpa_half_precision { a_type: f16_r, b_type: f16_r, c_type: f16_r, d_type: f16_r, compute_type: f32_r } - &single_precision { a_type: f32_r, b_type: f32_r, c_type: f32_r, d_type: f32_r, compute_type: f32_r } - &double_precision { a_type: f64_r, b_type: f64_r, c_type: f64_r, d_type: f64_r, compute_type: f64_r } - &int8_precision { a_type: i8_r, b_type: i8_r, c_type: i32_r, d_type: i32_r, compute_type: i32_r } - &bf16_precision { a_type: bf16_r, b_type: bf16_r, c_type: bf16_r, d_type: bf16_r, compute_type: bf16_r } - &hpa_bf16_precision { a_type: bf16_r, b_type: bf16_r, c_type: bf16_r, d_type: bf16_r, compute_type: f32_r } C precisions: &single_double_precisions - *single_precision - *double_precision Short simple precisions: &half_single_precisions - *half_precision - *single_precision Short precisions: &int8_half_single_precisions - *int8_precision - *half_precision - *single_precision ML precisions: &hpa_half_single_precisions - *hpa_half_precision - *half_precision - *single_precision Non-int precisions: &hpa_half_single_double_precisions - *hpa_half_precision - *half_precision - *single_precision - *double_precision Simple precisions: &half_single_double_precisions - *half_precision - *single_precision - *double_precision Complex precisions: &complex_precisions - &half_precision_complex { a_type: f16_c, b_type: f16_c, c_type: f16_c, d_type: f16_c, compute_type: f16_c } - &hpa_half_precision_complex { a_type: f16_c, b_type: f16_c, c_type: f16_c, d_type: f16_c, compute_type: f32_c } - &single_precision_complex { a_type: f32_c, b_type: f32_c, c_type: f32_c, d_type: f32_c, compute_type: f32_c } - &double_precision_complex { a_type: f64_c, b_type: f64_c, c_type: f64_c, d_type: f64_c, compute_type: f64_c } - &int8_precision_complex { a_type: i8_c, b_type: i8_c, c_type: i32_c, d_type: i32_c, compute_type: i32_c } - &hpa_bf16_precision_complex { a_type: bf16_c, b_type: bf16_c, c_type: bf16_c, d_type: bf16_c, compute_type: f32_c } Half Precision complex and real: &half_precision_complex_real - *half_precision - *half_precision_complex Hpa Half Precision complex and real: &hpa_half_precision_complex_real - *hpa_half_precision - *hpa_half_precision_complex Single Precision complex and real: &single_precision_complex_real - *single_precision - *single_precision_complex Double Precision complex and real: &double_precision_complex_real - *double_precision - *double_precision_complex int8 Precision complex and real: &int8_precision_complex_real - *int8_precision - *int8_precision_complex hpabf16 Precision complex and real: &hpa_bf16_precision_complex_real - *hpa_bf16_precision - *hpa_bf16_precision_complex C precisions complex: &single_double_precisions_complex - *single_precision_complex - *double_precision_complex C precisions complex and real: &single_double_precisions_complex_real - *single_precision - *double_precision - *single_precision_complex - *double_precision_complex Short simple precisions complex and real: &half_single_precisions_complex_real - *half_precision - *single_precision - *half_precision_complex - *single_precision_complex Short precisions complex and real: &int8_half_single_precisions_complex_real - *int8_precision - *half_precision - *single_precision - *int8_precision_complex - *half_precision_complex - *single_precision_complex ML precisions complex and real: &hpa_half_single_precisions_complex_real - *hpa_half_precision - *half_precision - *single_precision - *hpa_half_precision_complex - *half_precision_complex - *single_precision_complex Non-int precisions complex and real: &hpa_half_single_double_precisions_complex_real - *hpa_half_precision - *half_precision - *single_precision - *double_precision - *hpa_half_precision_complex - *half_precision_complex - *single_precision_complex - *double_precision_complex Simple precisions complex and real: &half_single_double_precisions_complex_real - *half_precision - *single_precision - *double_precision - *half_precision_complex - *single_precision_complex - *double_precision_complex ############################################# # Used for Scal # ############################################# Joined precisions: &complex_real_in_complex_out - &single_precision_complex_real_in_complex_out { a_type: f32_c, b_type: f32_r, c_type: f32_r, d_type: f32_c, compute_type: f32_c } - &double_precision_complex_real_in_complex_out { a_type: f64_c, b_type: f64_r, c_type: f64_r, d_type: f64_c, compute_type: f64_c } Single double joined: &single_double_complex_real_in_complex_out - *single_precision_complex_real_in_complex_out - *double_precision_complex_real_in_complex_out ############################################# # Used for Scal_ex # ############################################# Hpa half and half: &hpa_half_half_precisions - *hpa_half_precision - *half_precision ############################################# # Used for rot* # ############################################# Complex real mixed: &complex_real_mixed - &single_precision_complex_real_in_complex_compute { a_type: f32_c, b_type: f32_r, c_type: f32_c, d_type: f32_c, compute_type: f32_c } - &double_precision_complex_real_in_complex_compute { a_type: f64_c, b_type: f64_r, c_type: f64_c, d_type: f64_c, compute_type: f64_c } - &single_precision_complex_real_in_real_compute { a_type: f32_c, b_type: f32_r, c_type: f32_c, d_type: f32_c, compute_type: f32_r } - &double_precision_complex_real_in_real_compute { a_type: f64_c, b_type: f64_r, c_type: f64_c, d_type: f64_c, compute_type: f64_r } - &single_precision_complex_real_c { a_type: f32_c, b_type: f32_c, c_type: f32_r, d_type: f32_c, compute_type: f32_c } - &double_precision_complex_real_c { a_type: f64_c, b_type: f64_c, c_type: f64_r, d_type: f64_c, compute_type: f64_c } rot precisions: &rot_precisions - *single_precision - *double_precision - *single_precision_complex_real_in_complex_compute - *single_precision_complex_real_in_real_compute - *double_precision_complex_real_in_complex_compute - *double_precision_complex_real_in_real_compute rot_ex precisions: &rot_ex_precisions - *single_precision - *double_precision - *single_precision_complex - *double_precision_complex - *hpa_half_precision - *hpa_bf16_precision - *single_precision_complex_real_c - *double_precision_complex_real_c rotg precisions: &rotg_precisions - *single_precision - *double_precision - *single_precision_complex_real_in_complex_compute - *double_precision_complex_real_in_complex_compute ############################################# # Used for Dot (quick) # ############################################# Half bfloat single double complex real: &half_bfloat_single_double_complex_real_precisions - *half_precision - *hpa_bf16_precision - *single_precision - *double_precision - *half_precision_complex - *single_precision_complex - *double_precision_complex ############################################# # Used for axpy_ex # ############################################# Hpa single double complex real: &half_single_double_complex_real_precisions - *single_precision - *double_precision - *single_precision_complex - *double_precision_complex - *hpa_half_precision - *half_precision ############################################# # Used for nrm2_ex # ############################################# nrm2_ex precisions: &nrm2_ex_precisions - &half_precision_nrm2 { a_type: f16_r, b_type: f16_r, c_type: f32_r, d_type: f32_r, compute_type: f32_r } - *single_precision - *double_precision - *single_precision_complex_real_in_real_compute - *double_precision_complex_real_in_real_compute nrm2_ex precisions double: &nrm2_ex_precisions_double - *double_precision - *double_precision_complex_real_in_real_compute Arguments: - M: int - N: int - K: int - KL: int - KU: int - rows: int - cols: int - lda: int - ldb: int - ldc: int - ldd: int - a_type: hipblasDatatype_t - b_type: hipblasDatatype_t - c_type: hipblasDatatype_t - d_type: hipblasDatatype_t - compute_type: hipblasDatatype_t - incx: int - incy: int - incd: int - incb: int - stride_scale: c_double - stride_a: c_int64 - stride_b: c_int64 - stride_c: c_int64 - stride_d: c_int64 - stride_x: c_int64 - stride_y: c_int64 - start: int - end: int - step: int - alpha: c_double - alphai: c_double - beta: c_double - betai: c_double - transA: c_char - transB: c_char - side: c_char - uplo: c_char - diag: c_char - call_count: int - batch_count: int - fortran: c_bool - norm_check: int - unit_check: int - timing: int - iters: int - cold_iters: int - algo: c_uint - solution_index: c_int - flags: c_uint - function: c_char*64 - name: c_char*64 - category: c_char*64 - atomics_mode: hipblas_atomics_mode - initialization: hipblas_initialization # - known_bug_platforms: c_char*64 # - c_noalias_d: c_bool # These named dictionary lists [ {dict1}, {dict2}, etc. ] supply subsets of # test arguments in a structured way. The dictionaries are applied to the test # one at a time, to generate combinations. If one of this table's entries is # a dictionary of size one, it means that the argument named by its key takes # on values paired with argument named by its value. For example: # # - function: precision # # when used with the code: # # function: # func1: prec1 # func2: prec2 # func3: prec3 # # causes (function, precision) to take on the values (func1, prec1), # (func2, prec2), (func3, prec3), etc. Dictionary lists to expand: - arguments - transA_transB - alpha_beta - alphai_betai - incx_incy - matrix_size - precision - function: precision # In case an array argument needs to be passed directly to C as an array, # it needs to be listed here to avoid being expanded into multiple test # cases with each of its elements. Lists to not expand: - e.g., an array argument not to be expanded # Defaults Defaults: M: -1 N: -1 K: -1 KL: -1 KU: -1 rows: 128 cols: 128 incx: 1 incy: 1 incd: 0 incb: 0 stride_scale: 1.0 start: 1024 end: 10240 step: 1000 alpha: 1.0 alphai: 0.0 beta: 0.0 betai: 0.0 transA: '*' transB: '*' side: '*' uplo: '*' diag: '*' call_count: 1 batch_count: -1 fortran: false norm_check: 0 unit_check: 1 timing: 0 iters: 10 cold_iters: 2 algo: 0 solution_index: 0 flags: 0 name: hipblas-bench category: nightly # default benchmarking to faster atomics_allowed (test is default not allowed) atomics_mode: atomics_allowed initialization: rand_int #workspace_size: 0 # known_bug_platforms: '' #c_noalias_d: false hipBLAS-rocm-5.5.1/clients/include/hipblas_data.hpp000066400000000000000000000117411434647641600221350ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ************************************************************************ */ #pragma once #include "hipblas_arguments.hpp" #include "test_cleanup.hpp" #include #include #include #include #include #include #include #include #include #include #if __has_include() #include namespace fs = std::filesystem; #elif __has_include() #include namespace fs = std::experimental::filesystem; #else #error no filesystem found #endif // Class used to read Arguments data into the tests class HipBLAS_TestData { // data filename static auto& filename() { static std::string filename; return filename; } // filter iterator class iterator : public std::istream_iterator { bool (*const filter)(const Arguments&) = nullptr; // Skip entries for which filter is false void skip_filter() { static auto endIter = std::istream_iterator{}; if(filter) while(*this != endIter && !filter(**this)) ++*static_cast*>(this); } public: // Constructor takes a filter and iterator iterator(bool filter(const Arguments&), std::istream_iterator iter) : std::istream_iterator(iter) , filter(filter) { skip_filter(); } // Default end iterator and nullptr filter iterator() = default; // Preincrement iterator operator with filtering iterator& operator++() { ++*static_cast*>(this); skip_filter(); return *this; } // We do not need a postincrement iterator operator // We delete it here so that the base class's isn't silently called // To implement it, use "auto old = *this; ++*this; return old;" iterator operator++(int) = delete; }; public: // Initialize filename, optionally removing it at exit static void set_filename(std::string name, bool remove_atexit = false) { filename() = std::move(name); if(remove_atexit) { auto cleanup = [] { fs::remove(filename().c_str()); }; atexit(cleanup); at_quick_exit(cleanup); } } // begin() iterator which accepts an optional filter. static iterator begin(bool filter(const Arguments&) = nullptr) { static std::ifstream* ifs = nullptr; // If this is the first time, or after test_cleanup::cleanup() has been called if(!ifs) { std::string fileToOpen = filename(); if(fileToOpen.empty()) return end(); // Allocate a std::ifstream and register it to be deleted during cleanup ifs = test_cleanup::allocate( &ifs, fileToOpen, std::ifstream::in | std::ifstream::binary); if(!ifs || ifs->fail()) { std::cerr << "Cannot open " << fileToOpen << ": " << strerror(errno) << std::endl; exit(EXIT_FAILURE); } } // We re-seek the file back to position 0 ifs->clear(); ifs->seekg(0); // Validate the data file format Arguments::validate(*ifs); // We create a filter iterator which will choose only the test cases we want right now. // This is to preserve Gtest structure while not creating no-op tests which "always pass". return iterator(filter, std::istream_iterator(*ifs)); } // end() iterator static iterator end() { return {}; } }; hipBLAS-rocm-5.5.1/clients/include/hipblas_datatype2string.hpp000066400000000000000000000102251434647641600243440ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef hipblas_DATATYPE2STRING_H_ #define hipblas_DATATYPE2STRING_H_ #include "hipblas.h" #include "hipblas.hpp" #include #include enum hipblas_initialization { rand_int = 111, trig_float = 222, hpl = 333, }; inline constexpr auto hipblas_initialization2string(hipblas_initialization init) { switch(init) { case hipblas_initialization::rand_int: return "rand_int"; case hipblas_initialization::trig_float: return "trig_float"; case hipblas_initialization::hpl: return "hpl"; } return "invalid"; } hipblas_initialization string2hipblas_initialization(const std::string& value); inline std::ostream& operator<<(std::ostream& os, hipblas_initialization init) { return os << hipblas_initialization2string(init); } // Complex output inline std::ostream& operator<<(std::ostream& os, const hipblasComplex& x) { os << "'(" << x.real() << ":" << x.imag() << ")'"; return os; } inline std::ostream& operator<<(std::ostream& os, const hipblasDoubleComplex& x) { os << "'(" << x.real() << ":" << x.imag() << ")'"; return os; } /* ============================================================================================ */ /* Convert hipblas constants to lapack char. */ char hipblas2char_operation(hipblasOperation_t value); char hipblas2char_fill(hipblasFillMode_t value); char hipblas2char_diagonal(hipblasDiagType_t value); char hipblas2char_side(hipblasSideMode_t value); /* ============================================================================================ */ /* Convert lapack char constants to hipblas type. */ hipblasOperation_t char2hipblas_operation(char value); hipblasFillMode_t char2hipblas_fill(char value); hipblasDiagType_t char2hipblas_diagonal(char value); hipblasSideMode_t char2hipblas_side(char value); hipblasDatatype_t string2hipblas_datatype(const std::string& value); // return precision string for hipblas_datatype inline constexpr auto hipblas_datatype2string(hipblasDatatype_t type) { switch(type) { case HIPBLAS_R_16F: return "f16_r"; case HIPBLAS_R_32F: return "f32_r"; case HIPBLAS_R_64F: return "f64_r"; case HIPBLAS_C_16F: return "f16_k"; case HIPBLAS_C_32F: return "f32_c"; case HIPBLAS_C_64F: return "f64_c"; case HIPBLAS_R_8I: return "i8_r"; case HIPBLAS_R_8U: return "u8_r"; case HIPBLAS_R_32I: return "i32_r"; case HIPBLAS_R_32U: return "u32_r"; case HIPBLAS_C_8I: return "i8_c"; case HIPBLAS_C_8U: return "u8_c"; case HIPBLAS_C_32I: return "i32_c"; case HIPBLAS_C_32U: return "u32_c"; case HIPBLAS_R_16B: return "bf16_r"; case HIPBLAS_C_16B: return "bf16_c"; case HIPBLAS_DATATYPE_INVALID: return "invalid"; } return "invalid"; } #endif hipBLAS-rocm-5.5.1/clients/include/hipblas_fortran.f90000066400000000000000000021157721434647641600225210ustar00rootroot00000000000000!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Copyright (c) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. ! ! Permission is hereby granted, free of charge, to any person obtaining a copy ! of this software and associated documentation files (the "Software"), to deal ! in the Software without restriction, including without limitation the rights ! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ! copies of the Software, and to permit persons to whom the Software is ! furnished to do so, subject to the following conditions: ! ! The above copyright notice and this permission notice shall be included in ! all copies or substantial portions of the Software. ! ! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ! THE SOFTWARE. ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! module hipblas_interface use iso_c_binding use hipblas contains !--------! ! Aux ! !--------! function hipblasSetVectorFortran(n, elemSize, x, incx, y, incy) & bind(c, name='hipblasSetVectorFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSetVectorFortran integer(c_int), value :: n integer(c_int), value :: elemSize type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasSetVectorFortran = & hipblasSetVector(n, elemSize, x, incx, y, incy) end function hipblasSetVectorFortran function hipblasGetVectorFortran(n, elemSize, x, incx, y, incy) & bind(c, name='hipblasGetVectorFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasGetVectorFortran integer(c_int), value :: n integer(c_int), value :: elemSize type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasGetVectorFortran = & hipblasGetVector(n, elemSize, x, incx, y, incy) end function hipblasGetVectorFortran function hipblasSetMatrixFortran(rows, cols, elemSize, A, lda, B, ldb) & bind(c, name='hipblasSetMatrixFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSetMatrixFortran integer(c_int), value :: rows integer(c_int), value :: cols integer(c_int), value :: elemSize type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasSetMatrixFortran = & hipblasSetMatrix(rows, cols, elemSize, A, lda, B, ldb) end function hipblasSetMatrixFortran function hipblasGetMatrixFortran(rows, cols, elemSize, A, lda, B, ldb) & bind(c, name='hipblasGetMatrixFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasGetMatrixFortran integer(c_int), value :: rows integer(c_int), value :: cols integer(c_int), value :: elemSize type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasGetMatrixFortran = & hipblasSetMatrix(rows, cols, elemSize, A, lda, B, ldb) end function hipblasGetMatrixFortran function hipblasSetVectorAsyncFortran(n, elemSize, x, incx, y, incy, stream) & bind(c, name='hipblasSetVectorAsyncFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSetVectorAsyncFortran integer(c_int), value :: n integer(c_int), value :: elemSize type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: stream hipblasSetVectorAsyncFortran = & hipblasSetVectorAsync(n, elemSize, x, incx, y, incy, stream) end function hipblasSetVectorAsyncFortran function hipblasGetVectorAsyncFortran(n, elemSize, x, incx, y, incy, stream) & bind(c, name='hipblasGetVectorAsyncFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasGetVectorAsyncFortran integer(c_int), value :: n integer(c_int), value :: elemSize type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: stream hipblasGetVectorAsyncFortran = & hipblasGetVectorAsync(n, elemSize, x, incx, y, incy, stream) end function hipblasGetVectorAsyncFortran function hipblasSetMatrixAsyncFortran(rows, cols, elemSize, A, lda, B, ldb, stream) & bind(c, name='hipblasSetMatrixAsyncFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSetMatrixAsyncFortran integer(c_int), value :: rows integer(c_int), value :: cols integer(c_int), value :: elemSize type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: stream hipblasSetMatrixAsyncFortran = & hipblasSetMatrixAsync(rows, cols, elemSize, A, lda, B, ldb, stream) end function hipblasSetMatrixAsyncFortran function hipblasGetMatrixAsyncFortran(rows, cols, elemSize, A, lda, B, ldb, stream) & bind(c, name='hipblasGetMatrixAsyncFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasGetMatrixAsyncFortran integer(c_int), value :: rows integer(c_int), value :: cols integer(c_int), value :: elemSize type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: stream hipblasGetMatrixAsyncFortran = & hipblasGetMatrixAsync(rows, cols, elemSize, A, lda, B, ldb, stream) end function hipblasGetMatrixAsyncFortran function hipblasSetAtomicsModeFortran(handle, atomics_mode) & bind(c, name='hipblasSetAtomicsModeFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSetAtomicsModeFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_ATOMICS_ALLOWED)), value :: atomics_mode hipblasSetAtomicsModeFortran = & hipblasSetAtomicsMode(handle, atomics_mode) end function hipblasSetAtomicsModeFortran function hipblasGetAtomicsModeFortran(handle, atomics_mode) & bind(c, name='hipblasGetAtomicsModeFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasGetAtomicsModeFortran type(c_ptr), value :: handle type(c_ptr), value :: atomics_mode hipblasGetAtomicsModeFortran = & hipblasGetAtomicsMode(handle, atomics_mode) end function hipblasGetAtomicsModeFortran !--------! ! blas 1 ! !--------! ! scal function hipblasSscalFortran(handle, n, alpha, x, incx) & bind(c, name='hipblasSscalFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSscalFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx hipblasSscalFortran = & hipblasSscal(handle, n, alpha, x, incx) return end function hipblasSscalFortran function hipblasDscalFortran(handle, n, alpha, x, incx) & bind(c, name='hipblasDscalFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDscalFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx hipblasDscalFortran = & hipblasDscal(handle, n, alpha, x, incx) return end function hipblasDscalFortran function hipblasCscalFortran(handle, n, alpha, x, incx) & bind(c, name='hipblasCscalFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCscalFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx hipblasCscalFortran = & hipblasCscal(handle, n, alpha, x, incx) return end function hipblasCscalFortran function hipblasZscalFortran(handle, n, alpha, x, incx) & bind(c, name='hipblasZscalFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZscalFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx hipblasZscalFortran = & hipblasZscal(handle, n, alpha, x, incx) return end function hipblasZscalFortran function hipblasCsscalFortran(handle, n, alpha, x, incx) & bind(c, name='hipblasCsscalFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsscalFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx hipblasCsscalFortran = & hipblasCsscal(handle, n, alpha, x, incx) return end function hipblasCsscalFortran function hipblasZdscalFortran(handle, n, alpha, x, incx) & bind(c, name='hipblasZdscalFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdscalFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx hipblasZdscalFortran = & hipblasZdscal(handle, n, alpha, x, incx) return end function hipblasZdscalFortran ! scalBatched function hipblasSscalBatchedFortran(handle, n, alpha, x, incx, batch_count) & bind(c, name='hipblasSscalBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSscalBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasSscalBatchedFortran = & hipblasSscalBatched(handle, n, alpha, x, incx, batch_count) return end function hipblasSscalBatchedFortran function hipblasDscalBatchedFortran(handle, n, alpha, x, incx, batch_count) & bind(c, name='hipblasDscalBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDscalBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasDscalBatchedFortran = & hipblasDscalBatched(handle, n, alpha, x, incx, batch_count) return end function hipblasDscalBatchedFortran function hipblasCscalBatchedFortran(handle, n, alpha, x, incx, batch_count) & bind(c, name='hipblasCscalBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCscalBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasCscalBatchedFortran = & hipblasCscalBatched(handle, n, alpha, x, incx, batch_count) return end function hipblasCscalBatchedFortran function hipblasZscalBatchedFortran(handle, n, alpha, x, incx, batch_count) & bind(c, name='hipblasZscalBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZscalBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasZscalBatchedFortran = & hipblasZscalBatched(handle, n, alpha, x, incx, batch_count) return end function hipblasZscalBatchedFortran function hipblasCsscalBatchedFortran(handle, n, alpha, x, incx, batch_count) & bind(c, name='hipblasCsscalBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsscalBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasCsscalBatchedFortran = & hipblasCsscalBatched(handle, n, alpha, x, incx, batch_count) return end function hipblasCsscalBatchedFortran function hipblasZdscalBatchedFortran(handle, n, alpha, x, incx, batch_count) & bind(c, name='hipblasZdscalBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdscalBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasZdscalBatchedFortran = & hipblasZdscalBatched(handle, n, alpha, x, incx, batch_count) return end function hipblasZdscalBatchedFortran ! scalStridedBatched function hipblasSscalStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, batch_count) & bind(c, name='hipblasSscalStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSscalStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasSscalStridedBatchedFortran = & hipblasSscalStridedBatched(handle, n, alpha, x, incx, stride_x, batch_count) return end function hipblasSscalStridedBatchedFortran function hipblasDscalStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, batch_count) & bind(c, name='hipblasDscalStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDscalStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasDscalStridedBatchedFortran = & hipblasDscalStridedBatched(handle, n, alpha, x, incx, stride_x, batch_count) return end function hipblasDscalStridedBatchedFortran function hipblasCscalStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, batch_count) & bind(c, name='hipblasCscalStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCscalStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasCscalStridedBatchedFortran = & hipblasCscalStridedBatched(handle, n, alpha, x, incx, stride_x, batch_count) return end function hipblasCscalStridedBatchedFortran function hipblasZscalStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, batch_count) & bind(c, name='hipblasZscalStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZscalStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasZscalStridedBatchedFortran = & hipblasZscalStridedBatched(handle, n, alpha, x, incx, stride_x, batch_count) return end function hipblasZscalStridedBatchedFortran function hipblasCsscalStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, batch_count) & bind(c, name='hipblasCsscalStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsscalStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasCsscalStridedBatchedFortran = & hipblasCsscalStridedBatched(handle, n, alpha, x, incx, stride_x, batch_count) return end function hipblasCsscalStridedBatchedFortran function hipblasZdscalStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, batch_count) & bind(c, name='hipblasZdscalStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdscalStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasZdscalStridedBatchedFortran = & hipblasZdscalStridedBatched(handle, n, alpha, x, incx, stride_x, batch_count) return end function hipblasZdscalStridedBatchedFortran ! copy function hipblasScopyFortran(handle, n, x, incx, y, incy) & bind(c, name='hipblasScopyFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScopyFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasScopyFortran = & hipblasScopy(handle, n, x, incx, y, incy) return end function hipblasScopyFortran function hipblasDcopyFortran(handle, n, x, incx, y, incy) & bind(c, name='hipblasDcopyFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDcopyFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasDcopyFortran = & hipblasDcopy(handle, n, x, incx, y, incy) return end function hipblasDcopyFortran function hipblasCcopyFortran(handle, n, x, incx, y, incy) & bind(c, name='hipblasCcopyFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCcopyFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasCcopyFortran = & hipblasCcopy(handle, n, x, incx, y, incy) return end function hipblasCcopyFortran function hipblasZcopyFortran(handle, n, x, incx, y, incy) & bind(c, name='hipblasZcopyFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZcopyFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasZcopyFortran = & hipblasZcopy(handle, n, x, incx, y, incy) return end function hipblasZcopyFortran ! copyBatched function hipblasScopyBatchedFortran(handle, n, x, incx, y, incy, batch_count) & bind(c, name='hipblasScopyBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScopyBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasScopyBatchedFortran = & hipblasScopyBatched(handle, n, x, incx, y, incy, batch_count) return end function hipblasScopyBatchedFortran function hipblasDcopyBatchedFortran(handle, n, x, incx, y, incy, batch_count) & bind(c, name='hipblasDcopyBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDcopyBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasDcopyBatchedFortran = & hipblasDcopyBatched(handle, n, x, incx, y, incy, batch_count) return end function hipblasDcopyBatchedFortran function hipblasCcopyBatchedFortran(handle, n, x, incx, y, incy, batch_count) & bind(c, name='hipblasCcopyBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCcopyBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasCcopyBatchedFortran = & hipblasCcopyBatched(handle, n, x, incx, y, incy, batch_count) return end function hipblasCcopyBatchedFortran function hipblasZcopyBatchedFortran(handle, n, x, incx, y, incy, batch_count) & bind(c, name='hipblasZcopyBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZcopyBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasZcopyBatchedFortran = & hipblasZcopyBatched(handle, n, x, incx, y, incy, batch_count) return end function hipblasZcopyBatchedFortran ! copyStridedBatched function hipblasScopyStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasScopyStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScopyStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasScopyStridedBatchedFortran = & hipblasScopyStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasScopyStridedBatchedFortran function hipblasDcopyStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasDcopyStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDcopyStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasDcopyStridedBatchedFortran = & hipblasDcopyStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasDcopyStridedBatchedFortran function hipblasCcopyStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasCcopyStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCcopyStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasCcopyStridedBatchedFortran = & hipblasCcopyStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasCcopyStridedBatchedFortran function hipblasZcopyStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasZcopyStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZcopyStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasZcopyStridedBatchedFortran = & hipblasZcopyStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasZcopyStridedBatchedFortran ! dot function hipblasSdotFortran(handle, n, x, incx, y, incy, result) & bind(c, name='hipblasSdotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSdotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: result hipblasSdotFortran = & hipblasSdot(handle, n, x, incx, y, incy, result) return end function hipblasSdotFortran function hipblasDdotFortran(handle, n, x, incx, y, incy, result) & bind(c, name='hipblasDdotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDdotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: result hipblasDdotFortran = & hipblasDdot(handle, n, x, incx, y, incy, result) return end function hipblasDdotFortran function hipblasHdotFortran(handle, n, x, incx, y, incy, result) & bind(c, name='hipblasHdotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasHdotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: result hipblasHdotFortran = & hipblasHdot(handle, n, x, incx, y, incy, result) return end function hipblasHdotFortran function hipblasBfdotFortran(handle, n, x, incx, y, incy, result) & bind(c, name='hipblasBfdotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasBfdotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: result hipblasBfdotFortran = & hipblasBfdot(handle, n, x, incx, y, incy, result) return end function hipblasBfdotFortran function hipblasCdotuFortran(handle, n, x, incx, y, incy, result) & bind(c, name='hipblasCdotuFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCdotuFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: result hipblasCdotuFortran = & hipblasCdotu(handle, n, x, incx, y, incy, result) return end function hipblasCdotuFortran function hipblasCdotcFortran(handle, n, x, incx, y, incy, result) & bind(c, name='hipblasCdotcFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCdotcFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: result hipblasCdotcFortran = & hipblasCdotc(handle, n, x, incx, y, incy, result) return end function hipblasCdotcFortran function hipblasZdotuFortran(handle, n, x, incx, y, incy, result) & bind(c, name='hipblasZdotuFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdotuFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: result hipblasZdotuFortran = & hipblasZdotu(handle, n, x, incx, y, incy, result) return end function hipblasZdotuFortran function hipblasZdotcFortran(handle, n, x, incx, y, incy, result) & bind(c, name='hipblasZdotcFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdotcFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: result hipblasZdotcFortran = & hipblasZdotc(handle, n, x, incx, y, incy, result) return end function hipblasZdotcFortran ! dotBatched function hipblasSdotBatchedFortran(handle, n, x, incx, y, incy, batch_count, result) & bind(c, name='hipblasSdotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSdotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasSdotBatchedFortran = & hipblasSdotBatched(handle, n, x, incx, y, incy, batch_count, result) return end function hipblasSdotBatchedFortran function hipblasDdotBatchedFortran(handle, n, x, incx, y, incy, batch_count, result) & bind(c, name='hipblasDdotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDdotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDdotBatchedFortran = & hipblasDdotBatched(handle, n, x, incx, y, incy, batch_count, result) return end function hipblasDdotBatchedFortran function hipblasHdotBatchedFortran(handle, n, x, incx, y, incy, batch_count, result) & bind(c, name='hipblasHdotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasHdotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasHdotBatchedFortran = & hipblasHdotBatched(handle, n, x, incx, y, incy, batch_count, result) return end function hipblasHdotBatchedFortran function hipblasBfdotBatchedFortran(handle, n, x, incx, y, incy, batch_count, result) & bind(c, name='hipblasBfdotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasBfdotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasBfdotBatchedFortran = & hipblasBfdotBatched(handle, n, x, incx, y, incy, batch_count, result) return end function hipblasBfdotBatchedFortran function hipblasCdotuBatchedFortran(handle, n, x, incx, y, incy, batch_count, result) & bind(c, name='hipblasCdotuBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCdotuBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasCdotuBatchedFortran = & hipblasCdotuBatched(handle, n, x, incx, y, incy, batch_count, result) return end function hipblasCdotuBatchedFortran function hipblasCdotcBatchedFortran(handle, n, x, incx, y, incy, batch_count, result) & bind(c, name='hipblasCdotcBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCdotcBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasCdotcBatchedFortran = & hipblasCdotcBatched(handle, n, x, incx, y, incy, batch_count, result) return end function hipblasCdotcBatchedFortran function hipblasZdotuBatchedFortran(handle, n, x, incx, y, incy, batch_count, result) & bind(c, name='hipblasZdotuBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdotuBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasZdotuBatchedFortran = & hipblasZdotuBatched(handle, n, x, incx, y, incy, batch_count, result) return end function hipblasZdotuBatchedFortran function hipblasZdotcBatchedFortran(handle, n, x, incx, y, incy, batch_count, result) & bind(c, name='hipblasZdotcBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdotcBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasZdotcBatchedFortran = & hipblasZdotcBatched(handle, n, x, incx, y, incy, batch_count, result) return end function hipblasZdotcBatchedFortran ! dotStridedBatched function hipblasSdotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) & bind(c, name='hipblasSdotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSdotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasSdotStridedBatchedFortran = & hipblasSdotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) return end function hipblasSdotStridedBatchedFortran function hipblasDdotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) & bind(c, name='hipblasDdotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDdotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDdotStridedBatchedFortran = & hipblasDdotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) return end function hipblasDdotStridedBatchedFortran function hipblasHdotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) & bind(c, name='hipblasHdotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasHdotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasHdotStridedBatchedFortran = & hipblasHdotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) return end function hipblasHdotStridedBatchedFortran function hipblasBfdotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) & bind(c, name='hipblasBfdotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasBfdotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasBfdotStridedBatchedFortran = & hipblasBfdotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) return end function hipblasBfdotStridedBatchedFortran function hipblasCdotuStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) & bind(c, name='hipblasCdotuStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCdotuStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasCdotuStridedBatchedFortran = & hipblasCdotuStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) return end function hipblasCdotuStridedBatchedFortran function hipblasCdotcStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) & bind(c, name='hipblasCdotcStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCdotcStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasCdotcStridedBatchedFortran = & hipblasCdotcStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) return end function hipblasCdotcStridedBatchedFortran function hipblasZdotuStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) & bind(c, name='hipblasZdotuStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdotuStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasZdotuStridedBatchedFortran = & hipblasZdotuStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) return end function hipblasZdotuStridedBatchedFortran function hipblasZdotcStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) & bind(c, name='hipblasZdotcStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdotcStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasZdotcStridedBatchedFortran = & hipblasZdotcStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count, result) return end function hipblasZdotcStridedBatchedFortran ! swap function hipblasSswapFortran(handle, n, x, incx, y, incy) & bind(c, name='hipblasSswapFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSswapFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasSswapFortran = & hipblasSswap(handle, n, x, incx, y, incy) return end function hipblasSswapFortran function hipblasDswapFortran(handle, n, x, incx, y, incy) & bind(c, name='hipblasDswapFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDswapFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasDswapFortran = & hipblasDswap(handle, n, x, incx, y, incy) return end function hipblasDswapFortran function hipblasCswapFortran(handle, n, x, incx, y, incy) & bind(c, name='hipblasCswapFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCswapFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasCswapFortran = & hipblasCswap(handle, n, x, incx, y, incy) return end function hipblasCswapFortran function hipblasZswapFortran(handle, n, x, incx, y, incy) & bind(c, name='hipblasZswapFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZswapFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasZswapFortran = & hipblasZswap(handle, n, x, incx, y, incy) return end function hipblasZswapFortran ! swapBatched function hipblasSswapBatchedFortran(handle, n, x, incx, y, incy, batch_count) & bind(c, name='hipblasSswapBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSswapBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasSswapBatchedFortran = & hipblasSswapBatched(handle, n, x, incx, y, incy, batch_count) return end function hipblasSswapBatchedFortran function hipblasDswapBatchedFortran(handle, n, x, incx, y, incy, batch_count) & bind(c, name='hipblasDswapBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDswapBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasDswapBatchedFortran = & hipblasDswapBatched(handle, n, x, incx, y, incy, batch_count) return end function hipblasDswapBatchedFortran function hipblasCswapBatchedFortran(handle, n, x, incx, y, incy, batch_count) & bind(c, name='hipblasCswapBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCswapBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasCswapBatchedFortran = & hipblasCswapBatched(handle, n, x, incx, y, incy, batch_count) return end function hipblasCswapBatchedFortran function hipblasZswapBatchedFortran(handle, n, x, incx, y, incy, batch_count) & bind(c, name='hipblasZswapBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZswapBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasZswapBatchedFortran = & hipblasZswapBatched(handle, n, x, incx, y, incy, batch_count) return end function hipblasZswapBatchedFortran ! swapStridedBatched function hipblasSswapStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasSswapStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSswapStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasSswapStridedBatchedFortran = & hipblasSswapStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasSswapStridedBatchedFortran function hipblasDswapStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasDswapStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDswapStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasDswapStridedBatchedFortran = & hipblasDswapStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasDswapStridedBatchedFortran function hipblasCswapStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasCswapStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCswapStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasCswapStridedBatchedFortran = & hipblasCswapStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasCswapStridedBatchedFortran function hipblasZswapStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasZswapStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZswapStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasZswapStridedBatchedFortran = & hipblasZswapStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasZswapStridedBatchedFortran ! axpy function hipblasHaxpyFortran(handle, n, alpha, x, incx, y, incy) & bind(c, name='hipblasHaxpyFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasHaxpyFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasHaxpyFortran = & hipblasHaxpy(handle, n, alpha, x, incx, y, incy) return end function hipblasHaxpyFortran function hipblasSaxpyFortran(handle, n, alpha, x, incx, y, incy) & bind(c, name='hipblasSaxpyFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSaxpyFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasSaxpyFortran = & hipblasSaxpy(handle, n, alpha, x, incx, y, incy) return end function hipblasSaxpyFortran function hipblasDaxpyFortran(handle, n, alpha, x, incx, y, incy) & bind(c, name='hipblasDaxpyFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDaxpyFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasDaxpyFortran = & hipblasDaxpy(handle, n, alpha, x, incx, y, incy) return end function hipblasDaxpyFortran function hipblasCaxpyFortran(handle, n, alpha, x, incx, y, incy) & bind(c, name='hipblasCaxpyFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCaxpyFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasCaxpyFortran = & hipblasCaxpy(handle, n, alpha, x, incx, y, incy) return end function hipblasCaxpyFortran function hipblasZaxpyFortran(handle, n, alpha, x, incx, y, incy) & bind(c, name='hipblasZaxpyFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZaxpyFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy hipblasZaxpyFortran = & hipblasZaxpy(handle, n, alpha, x, incx, y, incy) return end function hipblasZaxpyFortran ! axpyBatched function hipblasHaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count) & bind(c, name='hipblasHaxpyBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasHaxpyBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasHaxpyBatchedFortran = & hipblasHaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count) return end function hipblasHaxpyBatchedFortran function hipblasSaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count) & bind(c, name='hipblasSaxpyBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSaxpyBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasSaxpyBatchedFortran = & hipblasSaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count) return end function hipblasSaxpyBatchedFortran function hipblasDaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count) & bind(c, name='hipblasDaxpyBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDaxpyBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasDaxpyBatchedFortran = & hipblasDaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count) return end function hipblasDaxpyBatchedFortran function hipblasCaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count) & bind(c, name='hipblasCaxpyBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCaxpyBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasCaxpyBatchedFortran = & hipblasCaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count) return end function hipblasCaxpyBatchedFortran function hipblasZaxpyBatchedFortran(handle, n, alpha, x, incx, y, incy, batch_count) & bind(c, name='hipblasZaxpyBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZaxpyBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasZaxpyBatchedFortran = & hipblasZaxpyBatched(handle, n, alpha, x, incx, y, incy, batch_count) return end function hipblasZaxpyBatchedFortran ! axpyStridedBatched function hipblasHaxpyStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasHaxpyStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasHaxpyStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasHaxpyStridedBatchedFortran = & hipblasHaxpyStridedBatched(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasHaxpyStridedBatchedFortran function hipblasSaxpyStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasSaxpyStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSaxpyStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasSaxpyStridedBatchedFortran = & hipblasSaxpyStridedBatched(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasSaxpyStridedBatchedFortran function hipblasDaxpyStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasDaxpyStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDaxpyStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasDaxpyStridedBatchedFortran = & hipblasDaxpyStridedBatched(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasDaxpyStridedBatchedFortran function hipblasCaxpyStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasCaxpyStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCaxpyStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasCaxpyStridedBatchedFortran = & hipblasCaxpyStridedBatched(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasCaxpyStridedBatchedFortran function hipblasZaxpyStridedBatchedFortran(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) & bind(c, name='hipblasZaxpyStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZaxpyStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasZaxpyStridedBatchedFortran = & hipblasZaxpyStridedBatched(handle, n, alpha, x, incx, stride_x, y, incy, stride_y, batch_count) return end function hipblasZaxpyStridedBatchedFortran ! asum function hipblasSasumFortran(handle, n, x, incx, result) & bind(c, name='hipblasSasumFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSasumFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasSasumFortran = & hipblasSasum(handle, n, x, incx, result) return end function hipblasSasumFortran function hipblasDasumFortran(handle, n, x, incx, result) & bind(c, name='hipblasDasumFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDasumFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasDasumFortran = & hipblasDasum(handle, n, x, incx, result) return end function hipblasDasumFortran function hipblasScasumFortran(handle, n, x, incx, result) & bind(c, name='hipblasScasumFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScasumFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasScasumFortran = & hipblasScasum(handle, n, x, incx, result) return end function hipblasScasumFortran function hipblasDzasumFortran(handle, n, x, incx, result) & bind(c, name='hipblasDzasumFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDzasumFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasDzasumFortran = & hipblasDzasum(handle, n, x, incx, result) return end function hipblasDzasumFortran ! asumBatched function hipblasSasumBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasSasumBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSasumBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasSasumBatchedFortran = & hipblasSasumBatched(handle, n, x, incx, batch_count, result) return end function hipblasSasumBatchedFortran function hipblasDasumBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasDasumBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDasumBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDasumBatchedFortran = & hipblasDasumBatched(handle, n, x, incx, batch_count, result) return end function hipblasDasumBatchedFortran function hipblasScasumBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasScasumBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScasumBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasScasumBatchedFortran = & hipblasScasumBatched(handle, n, x, incx, batch_count, result) return end function hipblasScasumBatchedFortran function hipblasDzasumBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasDzasumBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDzasumBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDzasumBatchedFortran = & hipblasDzasumBatched(handle, n, x, incx, batch_count, result) return end function hipblasDzasumBatchedFortran ! asumStridedBatched function hipblasSasumStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasSasumStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSasumStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasSasumStridedBatchedFortran = & hipblasSasumStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasSasumStridedBatchedFortran function hipblasDasumStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasDasumStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDasumStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDasumStridedBatchedFortran = & hipblasDasumStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasDasumStridedBatchedFortran function hipblasScasumStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasScasumStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScasumStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasScasumStridedBatchedFortran = & hipblasScasumStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasScasumStridedBatchedFortran function hipblasDzasumStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasDzasumStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDzasumStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDzasumStridedBatchedFortran = & hipblasDzasumStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasDzasumStridedBatchedFortran ! nrm2 function hipblasSnrm2Fortran(handle, n, x, incx, result) & bind(c, name='hipblasSnrm2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSnrm2Fortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasSnrm2Fortran = & hipblasSnrm2(handle, n, x, incx, result) return end function hipblasSnrm2Fortran function hipblasDnrm2Fortran(handle, n, x, incx, result) & bind(c, name='hipblasDnrm2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDnrm2Fortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasDnrm2Fortran = & hipblasDnrm2(handle, n, x, incx, result) return end function hipblasDnrm2Fortran function hipblasScnrm2Fortran(handle, n, x, incx, result) & bind(c, name='hipblasScnrm2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScnrm2Fortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasScnrm2Fortran = & hipblasScnrm2(handle, n, x, incx, result) return end function hipblasScnrm2Fortran function hipblasDznrm2Fortran(handle, n, x, incx, result) & bind(c, name='hipblasDznrm2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDznrm2Fortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasDznrm2Fortran = & hipblasDznrm2(handle, n, x, incx, result) return end function hipblasDznrm2Fortran ! nrm2Batched function hipblasSnrm2BatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasSnrm2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSnrm2BatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasSnrm2BatchedFortran = & hipblasSnrm2Batched(handle, n, x, incx, batch_count, result) return end function hipblasSnrm2BatchedFortran function hipblasDnrm2BatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasDnrm2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDnrm2BatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDnrm2BatchedFortran = & hipblasDnrm2Batched(handle, n, x, incx, batch_count, result) return end function hipblasDnrm2BatchedFortran function hipblasScnrm2BatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasScnrm2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScnrm2BatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasScnrm2BatchedFortran = & hipblasScnrm2Batched(handle, n, x, incx, batch_count, result) return end function hipblasScnrm2BatchedFortran function hipblasDznrm2BatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasDznrm2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDznrm2BatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDznrm2BatchedFortran = & hipblasDznrm2Batched(handle, n, x, incx, batch_count, result) return end function hipblasDznrm2BatchedFortran ! nrm2StridedBatched function hipblasSnrm2StridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasSnrm2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSnrm2StridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasSnrm2StridedBatchedFortran = & hipblasSnrm2StridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasSnrm2StridedBatchedFortran function hipblasDnrm2StridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasDnrm2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDnrm2StridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDnrm2StridedBatchedFortran = & hipblasDnrm2StridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasDnrm2StridedBatchedFortran function hipblasScnrm2StridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasScnrm2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScnrm2StridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasScnrm2StridedBatchedFortran = & hipblasScnrm2StridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasScnrm2StridedBatchedFortran function hipblasDznrm2StridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasDznrm2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDznrm2StridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasDznrm2StridedBatchedFortran = & hipblasDznrm2StridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasDznrm2StridedBatchedFortran ! amax function hipblasIsamaxFortran(handle, n, x, incx, result) & bind(c, name='hipblasIsamaxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIsamaxFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasIsamaxFortran = & hipblasIsamax(handle, n, x, incx, result) return end function hipblasIsamaxFortran function hipblasIdamaxFortran(handle, n, x, incx, result) & bind(c, name='hipblasIdamaxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIdamaxFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasIdamaxFortran = & hipblasIdamax(handle, n, x, incx, result) return end function hipblasIdamaxFortran function hipblasIcamaxFortran(handle, n, x, incx, result) & bind(c, name='hipblasIcamaxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIcamaxFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasIcamaxFortran = & hipblasIcamax(handle, n, x, incx, result) return end function hipblasIcamaxFortran function hipblasIzamaxFortran(handle, n, x, incx, result) & bind(c, name='hipblasIzamaxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIzamaxFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasIzamaxFortran = & hipblasIzamax(handle, n, x, incx, result) return end function hipblasIzamaxFortran ! amaxBatched function hipblasIsamaxBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasIsamaxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIsamaxBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIsamaxBatchedFortran = & hipblasIsamaxBatched(handle, n, x, incx, batch_count, result) return end function hipblasIsamaxBatchedFortran function hipblasIdamaxBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasIdamaxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIdamaxBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIdamaxBatchedFortran = & hipblasIdamaxBatched(handle, n, x, incx, batch_count, result) return end function hipblasIdamaxBatchedFortran function hipblasIcamaxBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasIcamaxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIcamaxBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIcamaxBatchedFortran = & hipblasIcamaxBatched(handle, n, x, incx, batch_count, result) return end function hipblasIcamaxBatchedFortran function hipblasIzamaxBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasIzamaxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIzamaxBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIzamaxBatchedFortran = & hipblasIzamaxBatched(handle, n, x, incx, batch_count, result) return end function hipblasIzamaxBatchedFortran ! amaxStridedBatched function hipblasIsamaxStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasIsamaxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIsamaxStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIsamaxStridedBatchedFortran = & hipblasIsamaxStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasIsamaxStridedBatchedFortran function hipblasIdamaxStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasIdamaxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIdamaxStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIdamaxStridedBatchedFortran = & hipblasIdamaxStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasIdamaxStridedBatchedFortran function hipblasIcamaxStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasIcamaxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIcamaxStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIcamaxStridedBatchedFortran = & hipblasIcamaxStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasIcamaxStridedBatchedFortran function hipblasIzamaxStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasIzamaxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIzamaxStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIzamaxStridedBatchedFortran = & hipblasIzamaxStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasIzamaxStridedBatchedFortran ! amin function hipblasIsaminFortran(handle, n, x, incx, result) & bind(c, name='hipblasIsaminFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIsaminFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasIsaminFortran = & hipblasIsamin(handle, n, x, incx, result) return end function hipblasIsaminFortran function hipblasIdaminFortran(handle, n, x, incx, result) & bind(c, name='hipblasIdaminFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIdaminFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasIdaminFortran = & hipblasIdamin(handle, n, x, incx, result) return end function hipblasIdaminFortran function hipblasIcaminFortran(handle, n, x, incx, result) & bind(c, name='hipblasIcaminFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIcaminFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasIcaminFortran = & hipblasIcamin(handle, n, x, incx, result) return end function hipblasIcaminFortran function hipblasIzaminFortran(handle, n, x, incx, result) & bind(c, name='hipblasIzaminFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIzaminFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: result hipblasIzaminFortran = & hipblasIzamin(handle, n, x, incx, result) return end function hipblasIzaminFortran ! aminBatched function hipblasIsaminBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasIsaminBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIsaminBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIsaminBatchedFortran = & hipblasIsaminBatched(handle, n, x, incx, batch_count, result) return end function hipblasIsaminBatchedFortran function hipblasIdaminBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasIdaminBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIdaminBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIdaminBatchedFortran = & hipblasIdaminBatched(handle, n, x, incx, batch_count, result) return end function hipblasIdaminBatchedFortran function hipblasIcaminBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasIcaminBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIcaminBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIcaminBatchedFortran = & hipblasIcaminBatched(handle, n, x, incx, batch_count, result) return end function hipblasIcaminBatchedFortran function hipblasIzaminBatchedFortran(handle, n, x, incx, batch_count, result) & bind(c, name='hipblasIzaminBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIzaminBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIzaminBatchedFortran = & hipblasIzaminBatched(handle, n, x, incx, batch_count, result) return end function hipblasIzaminBatchedFortran ! aminStridedBatched function hipblasIsaminStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasIsaminStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIsaminStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIsaminStridedBatchedFortran = & hipblasIsaminStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasIsaminStridedBatchedFortran function hipblasIdaminStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasIdaminStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIdaminStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIdaminStridedBatchedFortran = & hipblasIdaminStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasIdaminStridedBatchedFortran function hipblasIcaminStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasIcaminStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIcaminStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIcaminStridedBatchedFortran = & hipblasIcaminStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasIcaminStridedBatchedFortran function hipblasIzaminStridedBatchedFortran(handle, n, x, incx, stride_x, batch_count, result) & bind(c, name='hipblasIzaminStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasIzaminStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count type(c_ptr), value :: result hipblasIzaminStridedBatchedFortran = & hipblasIzaminStridedBatched(handle, n, x, incx, stride_x, batch_count, result) return end function hipblasIzaminStridedBatchedFortran ! rot function hipblasSrotFortran(handle, n, x, incx, y, incy, c, s) & bind(c, name='hipblasSrotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s hipblasSrotFortran = & hipblasSrot(handle, n, x, incx, y, incy, c, s) return end function hipblasSrotFortran function hipblasDrotFortran(handle, n, x, incx, y, incy, c, s) & bind(c, name='hipblasDrotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s hipblasDrotFortran = & hipblasDrot(handle, n, x, incx, y, incy, c, s) return end function hipblasDrotFortran function hipblasCrotFortran(handle, n, x, incx, y, incy, c, s) & bind(c, name='hipblasCrotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCrotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s hipblasCrotFortran = & hipblasCrot(handle, n, x, incx, y, incy, c, s) return end function hipblasCrotFortran function hipblasCsrotFortran(handle, n, x, incx, y, incy, c, s) & bind(c, name='hipblasCsrotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsrotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s hipblasCsrotFortran = & hipblasCsrot(handle, n, x, incx, y, incy, c, s) return end function hipblasCsrotFortran function hipblasZrotFortran(handle, n, x, incx, y, incy, c, s) & bind(c, name='hipblasZrotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZrotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s hipblasZrotFortran = & hipblasZrot(handle, n, x, incx, y, incy, c, s) return end function hipblasZrotFortran function hipblasZdrotFortran(handle, n, x, incx, y, incy, c, s) & bind(c, name='hipblasZdrotFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdrotFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s hipblasZdrotFortran = & hipblasZdrot(handle, n, x, incx, y, incy, c, s) return end function hipblasZdrotFortran ! rotBatched function hipblasSrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count) & bind(c, name='hipblasSrotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasSrotBatchedFortran = & hipblasSrotBatched(handle, n, x, incx, y, incy, c, s, batch_count) return end function hipblasSrotBatchedFortran function hipblasDrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count) & bind(c, name='hipblasDrotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasDrotBatchedFortran = & hipblasDrotBatched(handle, n, x, incx, y, incy, c, s, batch_count) return end function hipblasDrotBatchedFortran function hipblasCrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count) & bind(c, name='hipblasCrotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCrotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasCrotBatchedFortran = & hipblasCrotBatched(handle, n, x, incx, y, incy, c, s, batch_count) return end function hipblasCrotBatchedFortran function hipblasCsrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count) & bind(c, name='hipblasCsrotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsrotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasCsrotBatchedFortran = & hipblasCsrotBatched(handle, n, x, incx, y, incy, c, s, batch_count) return end function hipblasCsrotBatchedFortran function hipblasZrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count) & bind(c, name='hipblasZrotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZrotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasZrotBatchedFortran = & hipblasZrotBatched(handle, n, x, incx, y, incy, c, s, batch_count) return end function hipblasZrotBatchedFortran function hipblasZdrotBatchedFortran(handle, n, x, incx, y, incy, c, s, batch_count) & bind(c, name='hipblasZdrotBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdrotBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasZdrotBatchedFortran = & hipblasZdrotBatched(handle, n, x, incx, y, incy, c, s, batch_count) return end function hipblasZdrotBatchedFortran ! rotStridedBatched function hipblasSrotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) & bind(c, name='hipblasSrotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasSrotStridedBatchedFortran = & hipblasSrotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) return end function hipblasSrotStridedBatchedFortran function hipblasDrotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) & bind(c, name='hipblasDrotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasDrotStridedBatchedFortran = & hipblasDrotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) return end function hipblasDrotStridedBatchedFortran function hipblasCrotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) & bind(c, name='hipblasCrotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCrotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasCrotStridedBatchedFortran = & hipblasCrotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) return end function hipblasCrotStridedBatchedFortran function hipblasCsrotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) & bind(c, name='hipblasCsrotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsrotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasCsrotStridedBatchedFortran = & hipblasCsrotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) return end function hipblasCsrotStridedBatchedFortran function hipblasZrotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) & bind(c, name='hipblasZrotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZrotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasZrotStridedBatchedFortran = & hipblasZrotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) return end function hipblasZrotStridedBatchedFortran function hipblasZdrotStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) & bind(c, name='hipblasZdrotStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdrotStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasZdrotStridedBatchedFortran = & hipblasZdrotStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, c, s, batch_count) return end function hipblasZdrotStridedBatchedFortran ! rotg function hipblasSrotgFortran(handle, a, b, c, s) & bind(c, name='hipblasSrotgFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotgFortran type(c_ptr), value :: handle type(c_ptr), value :: a type(c_ptr), value :: b type(c_ptr), value :: c type(c_ptr), value :: s hipblasSrotgFortran = & hipblasSrotg(handle, a, b, c, s) return end function hipblasSrotgFortran function hipblasDrotgFortran(handle, a, b, c, s) & bind(c, name='hipblasDrotgFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotgFortran type(c_ptr), value :: handle type(c_ptr), value :: a type(c_ptr), value :: b type(c_ptr), value :: c type(c_ptr), value :: s hipblasDrotgFortran = & hipblasDrotg(handle, a, b, c, s) return end function hipblasDrotgFortran function hipblasCrotgFortran(handle, a, b, c, s) & bind(c, name='hipblasCrotgFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCrotgFortran type(c_ptr), value :: handle type(c_ptr), value :: a type(c_ptr), value :: b type(c_ptr), value :: c type(c_ptr), value :: s hipblasCrotgFortran = & hipblasCrotg(handle, a, b, c, s) return end function hipblasCrotgFortran function hipblasZrotgFortran(handle, a, b, c, s) & bind(c, name='hipblasZrotgFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZrotgFortran type(c_ptr), value :: handle type(c_ptr), value :: a type(c_ptr), value :: b type(c_ptr), value :: c type(c_ptr), value :: s hipblasZrotgFortran = & hipblasZrotg(handle, a, b, c, s) return end function hipblasZrotgFortran ! rotgBatched function hipblasSrotgBatchedFortran(handle, a, b, c, s, batch_count) & bind(c, name='hipblasSrotgBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotgBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: a type(c_ptr), value :: b type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasSrotgBatchedFortran = & hipblasSrotgBatched(handle, a, b, c, s, batch_count) return end function hipblasSrotgBatchedFortran function hipblasDrotgBatchedFortran(handle, a, b, c, s, batch_count) & bind(c, name='hipblasDrotgBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotgBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: a type(c_ptr), value :: b type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasDrotgBatchedFortran = & hipblasDrotgBatched(handle, a, b, c, s, batch_count) return end function hipblasDrotgBatchedFortran function hipblasCrotgBatchedFortran(handle, a, b, c, s, batch_count) & bind(c, name='hipblasCrotgBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCrotgBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: a type(c_ptr), value :: b type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasCrotgBatchedFortran = & hipblasCrotgBatched(handle, a, b, c, s, batch_count) return end function hipblasCrotgBatchedFortran function hipblasZrotgBatchedFortran(handle, a, b, c, s, batch_count) & bind(c, name='hipblasZrotgBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZrotgBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: a type(c_ptr), value :: b type(c_ptr), value :: c type(c_ptr), value :: s integer(c_int), value :: batch_count hipblasZrotgBatchedFortran = & hipblasZrotgBatched(handle, a, b, c, s, batch_count) return end function hipblasZrotgBatchedFortran ! rotgStridedBatched function hipblasSrotgStridedBatchedFortran(handle, a, stride_a, b, stride_b, c, stride_c, s, stride_s, batch_count) & bind(c, name='hipblasSrotgStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotgStridedBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: a integer(c_int64_t), value :: stride_a type(c_ptr), value :: b integer(c_int64_t), value :: stride_b type(c_ptr), value :: c integer(c_int64_t), value :: stride_c type(c_ptr), value :: s integer(c_int64_t), value :: stride_s integer(c_int), value :: batch_count hipblasSrotgStridedBatchedFortran = & hipblasSrotgStridedBatched(handle, a, stride_a, b, stride_b, c, stride_c, s, stride_s, batch_count) return end function hipblasSrotgStridedBatchedFortran function hipblasDrotgStridedBatchedFortran(handle, a, stride_a, b, stride_b, c, stride_c, s, stride_s, batch_count) & bind(c, name='hipblasDrotgStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotgStridedBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: a integer(c_int64_t), value :: stride_a type(c_ptr), value :: b integer(c_int64_t), value :: stride_b type(c_ptr), value :: c integer(c_int64_t), value :: stride_c type(c_ptr), value :: s integer(c_int64_t), value :: stride_s integer(c_int), value :: batch_count hipblasDrotgStridedBatchedFortran = & hipblasDrotgStridedBatched(handle, a, stride_a, b, stride_b, c, stride_c, s, stride_s, batch_count) return end function hipblasDrotgStridedBatchedFortran function hipblasCrotgStridedBatchedFortran(handle, a, stride_a, b, stride_b, c, stride_c, s, stride_s, batch_count) & bind(c, name='hipblasCrotgStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCrotgStridedBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: a integer(c_int64_t), value :: stride_a type(c_ptr), value :: b integer(c_int64_t), value :: stride_b type(c_ptr), value :: c integer(c_int64_t), value :: stride_c type(c_ptr), value :: s integer(c_int64_t), value :: stride_s integer(c_int), value :: batch_count hipblasCrotgStridedBatchedFortran = & hipblasCrotgStridedBatched(handle, a, stride_a, b, stride_b, c, stride_c, s, stride_s, batch_count) return end function hipblasCrotgStridedBatchedFortran function hipblasZrotgStridedBatchedFortran(handle, a, stride_a, b, stride_b, c, stride_c, s, stride_s, batch_count) & bind(c, name='hipblasZrotgStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZrotgStridedBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: a integer(c_int64_t), value :: stride_a type(c_ptr), value :: b integer(c_int64_t), value :: stride_b type(c_ptr), value :: c integer(c_int64_t), value :: stride_c type(c_ptr), value :: s integer(c_int64_t), value :: stride_s integer(c_int), value :: batch_count hipblasZrotgStridedBatchedFortran = & hipblasZrotgStridedBatched(handle, a, stride_a, b, stride_b, c, stride_c, s, stride_s, batch_count) return end function hipblasZrotgStridedBatchedFortran ! rotm function hipblasSrotmFortran(handle, n, x, incx, y, incy, param) & bind(c, name='hipblasSrotmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotmFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: param hipblasSrotmFortran = & hipblasSrotm(handle, n, x, incx, y, incy, param) return end function hipblasSrotmFortran function hipblasDrotmFortran(handle, n, x, incx, y, incy, param) & bind(c, name='hipblasDrotmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotmFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: param hipblasDrotmFortran = & hipblasDrotm(handle, n, x, incx, y, incy, param) return end function hipblasDrotmFortran ! rotmBatched function hipblasSrotmBatchedFortran(handle, n, x, incx, y, incy, param, batch_count) & bind(c, name='hipblasSrotmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotmBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: param integer(c_int), value :: batch_count hipblasSrotmBatchedFortran = & hipblasSrotmBatched(handle, n, x, incx, y, incy, param, batch_count) return end function hipblasSrotmBatchedFortran function hipblasDrotmBatchedFortran(handle, n, x, incx, y, incy, param, batch_count) & bind(c, name='hipblasDrotmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotmBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: param integer(c_int), value :: batch_count hipblasDrotmBatchedFortran = & hipblasDrotmBatched(handle, n, x, incx, y, incy, param, batch_count) return end function hipblasDrotmBatchedFortran ! rotmStridedBatched function hipblasSrotmStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, param, & stride_param, batch_count) & bind(c, name='hipblasSrotmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotmStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: param integer(c_int64_t), value :: stride_param integer(c_int), value :: batch_count hipblasSrotmStridedBatchedFortran = & hipblasSrotmStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, param, & stride_param, batch_count) return end function hipblasSrotmStridedBatchedFortran function hipblasDrotmStridedBatchedFortran(handle, n, x, incx, stride_x, y, incy, stride_y, param, & stride_param, batch_count) & bind(c, name='hipblasDrotmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotmStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: param integer(c_int64_t), value :: stride_param integer(c_int), value :: batch_count hipblasDrotmStridedBatchedFortran = & hipblasDrotmStridedBatched(handle, n, x, incx, stride_x, y, incy, stride_y, param, & stride_param, batch_count) return end function hipblasDrotmStridedBatchedFortran ! rotmg function hipblasSrotmgFortran(handle, d1, d2, x1, y1, param) & bind(c, name='hipblasSrotmgFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotmgFortran type(c_ptr), value :: handle type(c_ptr), value :: d1 type(c_ptr), value :: d2 type(c_ptr), value :: x1 type(c_ptr), value :: y1 type(c_ptr), value :: param hipblasSrotmgFortran = & hipblasSrotmg(handle, d1, d2, x1, y1, param) return end function hipblasSrotmgFortran function hipblasDrotmgFortran(handle, d1, d2, x1, y1, param) & bind(c, name='hipblasDrotmgFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotmgFortran type(c_ptr), value :: handle type(c_ptr), value :: d1 type(c_ptr), value :: d2 type(c_ptr), value :: x1 type(c_ptr), value :: y1 type(c_ptr), value :: param hipblasDrotmgFortran = & hipblasDrotmg(handle, d1, d2, x1, y1, param) return end function hipblasDrotmgFortran ! rotmgBatched function hipblasSrotmgBatchedFortran(handle, d1, d2, x1, y1, param, batch_count) & bind(c, name='hipblasSrotmgBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotmgBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: d1 type(c_ptr), value :: d2 type(c_ptr), value :: x1 type(c_ptr), value :: y1 type(c_ptr), value :: param integer(c_int), value :: batch_count hipblasSrotmgBatchedFortran = & hipblasSrotmgBatched(handle, d1, d2, x1, y1, param, batch_count) return end function hipblasSrotmgBatchedFortran function hipblasDrotmgBatchedFortran(handle, d1, d2, x1, y1, param, batch_count) & bind(c, name='hipblasDrotmgBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotmgBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: d1 type(c_ptr), value :: d2 type(c_ptr), value :: x1 type(c_ptr), value :: y1 type(c_ptr), value :: param integer(c_int), value :: batch_count hipblasDrotmgBatchedFortran = & hipblasDrotmgBatched(handle, d1, d2, x1, y1, param, batch_count) return end function hipblasDrotmgBatchedFortran ! rotmgStridedBatched function hipblasSrotmgStridedBatchedFortran(handle, d1, stride_d1, d2, stride_d2, x1, stride_x1, & y1, stride_y1, param, stride_param, batch_count) & bind(c, name='hipblasSrotmgStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSrotmgStridedBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: d1 integer(c_int64_t), value :: stride_d1 type(c_ptr), value :: d2 integer(c_int64_t), value :: stride_d2 type(c_ptr), value :: x1 integer(c_int64_t), value :: stride_x1 type(c_ptr), value :: y1 integer(c_int64_t), value :: stride_y1 type(c_ptr), value :: param integer(c_int64_t), value :: stride_param integer(c_int), value :: batch_count hipblasSrotmgStridedBatchedFortran = & hipblasSrotmgStridedBatched(handle, d1, stride_d1, d2, stride_d2, x1, stride_x1, y1, stride_y1, & param, stride_param, batch_count) return end function hipblasSrotmgStridedBatchedFortran function hipblasDrotmgStridedBatchedFortran(handle, d1, stride_d1, d2, stride_d2, x1, stride_x1, & y1, stride_y1, param, stride_param, batch_count) & bind(c, name='hipblasDrotmgStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDrotmgStridedBatchedFortran type(c_ptr), value :: handle type(c_ptr), value :: d1 integer(c_int64_t), value :: stride_d1 type(c_ptr), value :: d2 integer(c_int64_t), value :: stride_d2 type(c_ptr), value :: x1 integer(c_int64_t), value :: stride_x1 type(c_ptr), value :: y1 integer(c_int64_t), value :: stride_y1 type(c_ptr), value :: param integer(c_int64_t), value :: stride_param integer(c_int), value :: batch_count hipblasDrotmgStridedBatchedFortran = & hipblasDrotmgStridedBatched(handle, d1, stride_d1, d2, stride_d2, x1, stride_x1, y1, stride_y1, & param, stride_param, batch_count) return end function hipblasDrotmgStridedBatchedFortran !--------! ! blas 2 ! !--------! ! gbmv function hipblasSgbmvFortran(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy) & bind(c, name='hipblasSgbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasSgbmvFortran = & hipblasSgbmv(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy) end function hipblasSgbmvFortran function hipblasDgbmvFortran(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy) & bind(c, name='hipblasDgbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasDgbmvFortran = & hipblasDgbmv(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy) end function hipblasDgbmvFortran function hipblasCgbmvFortran(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy) & bind(c, name='hipblasCgbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasCgbmvFortran = & hipblasCgbmv(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy) end function hipblasCgbmvFortran function hipblasZgbmvFortran(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy) & bind(c, name='hipblasZgbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasZgbmvFortran = & hipblasZgbmv(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy) end function hipblasZgbmvFortran ! gbmvBatched function hipblasSgbmvBatchedFortran(handle, trans, m, n, kl, ku, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasSgbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasSgbmvBatchedFortran = & hipblasSgbmvBatched(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, & beta, y, incy, batch_count) end function hipblasSgbmvBatchedFortran function hipblasDgbmvBatchedFortran(handle, trans, m, n, kl, ku, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasDgbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasDgbmvBatchedFortran = & hipblasDgbmvBatched(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, & beta, y, incy, batch_count) end function hipblasDgbmvBatchedFortran function hipblasCgbmvBatchedFortran(handle, trans, m, n, kl, ku, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasCgbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasCgbmvBatchedFortran = & hipblasCgbmvBatched(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, & beta, y, incy, batch_count) end function hipblasCgbmvBatchedFortran function hipblasZgbmvBatchedFortran(handle, trans, m, n, kl, ku, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasZgbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasZgbmvBatchedFortran = & hipblasZgbmvBatched(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, & beta, y, incy, batch_count) end function hipblasZgbmvBatchedFortran ! gbmvStridedBatched function hipblasSgbmvStridedBatchedFortran(handle, trans, m, n, kl, ku, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasSgbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasSgbmvStridedBatchedFortran = & hipblasSgbmvStridedBatched(handle, trans, m, n, kl, ku, alpha, A, lda, stride_A, x, incx, stride_x, & beta, y, incy, stride_y, batch_count) end function hipblasSgbmvStridedBatchedFortran function hipblasDgbmvStridedBatchedFortran(handle, trans, m, n, kl, ku, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasDgbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasDgbmvStridedBatchedFortran = & hipblasDgbmvStridedBatched(handle, trans, m, n, kl, ku, alpha, A, lda, stride_A, x, incx, stride_x, & beta, y, incy, stride_y, batch_count) end function hipblasDgbmvStridedBatchedFortran function hipblasCgbmvStridedBatchedFortran(handle, trans, m, n, kl, ku, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasCgbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasCgbmvStridedBatchedFortran = & hipblasCgbmvStridedBatched(handle, trans, m, n, kl, ku, alpha, A, lda, stride_A, x, incx, stride_x, & beta, y, incy, stride_y, batch_count) end function hipblasCgbmvStridedBatchedFortran function hipblasZgbmvStridedBatchedFortran(handle, trans, m, n, kl, ku, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasZgbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: kl integer(c_int), value :: ku type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasZgbmvStridedBatchedFortran = & hipblasZgbmvStridedBatched(handle, trans, m, n, kl, ku, alpha, A, lda, stride_A, x, incx, stride_x, & beta, y, incy, stride_y, batch_count) end function hipblasZgbmvStridedBatchedFortran ! gemv function hipblasSgemvFortran(handle, trans, m, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasSgemvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgemvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasSgemvFortran = & hipblasSgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) end function hipblasSgemvFortran function hipblasDgemvFortran(handle, trans, m, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasDgemvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgemvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasDgemvFortran = & hipblasDgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) end function hipblasDgemvFortran function hipblasCgemvFortran(handle, trans, m, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasCgemvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgemvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasCgemvFortran = & hipblasCgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) end function hipblasCgemvFortran function hipblasZgemvFortran(handle, trans, m, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasZgemvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgemvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasZgemvFortran = & hipblasZgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) end function hipblasZgemvFortran ! gemvBatched function hipblasSgemvBatchedFortran(handle, trans, m, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasSgemvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgemvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasSgemvBatchedFortran = & hipblasSgemvBatched(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count) end function hipblasSgemvBatchedFortran function hipblasDgemvBatchedFortran(handle, trans, m, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasDgemvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgemvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasDgemvBatchedFortran = & hipblasDgemvBatched(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count) end function hipblasDgemvBatchedFortran function hipblasCgemvBatchedFortran(handle, trans, m, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasCgemvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgemvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasCgemvBatchedFortran = & hipblasCgemvBatched(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count) end function hipblasCgemvBatchedFortran function hipblasZgemvBatchedFortran(handle, trans, m, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasZgemvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgemvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasZgemvBatchedFortran = & hipblasZgemvBatched(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count) end function hipblasZgemvBatchedFortran ! gemvStridedBatched function hipblasSgemvStridedBatchedFortran(handle, trans, m, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasSgemvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgemvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasSgemvStridedBatchedFortran = & hipblasSgemvStridedBatched(handle, trans, m, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasSgemvStridedBatchedFortran function hipblasDgemvStridedBatchedFortran(handle, trans, m, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasDgemvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgemvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasDgemvStridedBatchedFortran = & hipblasDgemvStridedBatched(handle, trans, m, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasDgemvStridedBatchedFortran function hipblasCgemvStridedBatchedFortran(handle, trans, m, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasCgemvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgemvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasCgemvStridedBatchedFortran = & hipblasCgemvStridedBatched(handle, trans, m, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasCgemvStridedBatchedFortran function hipblasZgemvStridedBatchedFortran(handle, trans, m, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasZgemvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgemvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasZgemvStridedBatchedFortran = & hipblasZgemvStridedBatched(handle, trans, m, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasZgemvStridedBatchedFortran ! hbmv function hipblasChbmvFortran(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasChbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasChbmvFortran = & hipblasChbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy) end function hipblasChbmvFortran function hipblasZhbmvFortran(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasZhbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasZhbmvFortran = & hipblasZhbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy) end function hipblasZhbmvFortran ! hbmvBatched function hipblasChbmvBatchedFortran(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasChbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasChbmvBatchedFortran = & hipblasChbmvBatched(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy, batch_count) end function hipblasChbmvBatchedFortran function hipblasZhbmvBatchedFortran(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasZhbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasZhbmvBatchedFortran = & hipblasZhbmvBatched(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy, batch_count) end function hipblasZhbmvBatchedFortran ! hbmvStridedBatched function hipblasChbmvStridedBatchedFortran(handle, uplo, n, k, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasChbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasChbmvStridedBatchedFortran = & hipblasChbmvStridedBatched(handle, uplo, n, k, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasChbmvStridedBatchedFortran function hipblasZhbmvStridedBatchedFortran(handle, uplo, n, k, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasZhbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasZhbmvStridedBatchedFortran = & hipblasZhbmvStridedBatched(handle, uplo, n, k, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasZhbmvStridedBatchedFortran ! hemv function hipblasChemvFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasChemvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChemvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasChemvFortran = & hipblasChemv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy) end function hipblasChemvFortran function hipblasZhemvFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasZhemvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhemvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasZhemvFortran = & hipblasZhemv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy) end function hipblasZhemvFortran ! hemvBatched function hipblasChemvBatchedFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasChemvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChemvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasChemvBatchedFortran = & hipblasChemvBatched(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) end function hipblasChemvBatchedFortran function hipblasZhemvBatchedFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasZhemvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhemvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasZhemvBatchedFortran = & hipblasZhemvBatched(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) end function hipblasZhemvBatchedFortran ! hemvStridedBatched function hipblasChemvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasChemvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChemvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasChemvStridedBatchedFortran = & hipblasChemvStridedBatched(handle, uplo, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasChemvStridedBatchedFortran function hipblasZhemvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasZhemvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhemvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasZhemvStridedBatchedFortran = & hipblasZhemvStridedBatched(handle, uplo, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasZhemvStridedBatchedFortran ! her function hipblasCherFortran(handle, uplo, n, alpha, & x, incx, A, lda) & bind(c, name='hipblasCherFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda hipblasCherFortran = & hipblasCher(handle, uplo, n, alpha, x, incx, A, lda) end function hipblasCherFortran function hipblasZherFortran(handle, uplo, n, alpha, & x, incx, A, lda) & bind(c, name='hipblasZherFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZherFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda hipblasZherFortran = & hipblasZher(handle, uplo, n, alpha, x, incx, A, lda) end function hipblasZherFortran ! herBatched function hipblasCherBatchedFortran(handle, uplo, n, alpha, & x, incx, A, lda, batch_count) & bind(c, name='hipblasCherBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasCherBatchedFortran = & hipblasCherBatched(handle, uplo, n, alpha, x, incx, A, lda, batch_count) end function hipblasCherBatchedFortran function hipblasZherBatchedFortran(handle, uplo, n, alpha, & x, incx, A, lda, batch_count) & bind(c, name='hipblasZherBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZherBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasZherBatchedFortran = & hipblasZherBatched(handle, uplo, n, alpha, x, incx, A, lda, batch_count) end function hipblasZherBatchedFortran ! herStridedBatched function hipblasCherStridedBatchedFortran(handle, uplo, n, alpha, & x, incx, stride_x, A, lda, stride_A, batch_count) & bind(c, name='hipblasCherStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasCherStridedBatchedFortran = & hipblasCherStridedBatched(handle, uplo, n, alpha, x, incx, stride_x, & A, lda, stride_A, batch_count) end function hipblasCherStridedBatchedFortran function hipblasZherStridedBatchedFortran(handle, uplo, n, alpha, & x, incx, stride_x, A, lda, stride_A, batch_count) & bind(c, name='hipblasZherStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZherStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasZherStridedBatchedFortran = & hipblasZherStridedBatched(handle, uplo, n, alpha, x, incx, stride_x, & A, lda, stride_A, batch_count) end function hipblasZherStridedBatchedFortran ! her2 function hipblasCher2Fortran(handle, uplo, n, alpha, & x, incx, y, incy, A, lda) & bind(c, name='hipblasCher2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCher2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasCher2Fortran = & hipblasCher2(handle, uplo, n, alpha, x, incx, & y, incy, A, lda) end function hipblasCher2Fortran function hipblasZher2Fortran(handle, uplo, n, alpha, & x, incx, y, incy, A, lda) & bind(c, name='hipblasZher2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZher2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasZher2Fortran = & hipblasZher2(handle, uplo, n, alpha, x, incx, & y, incy, A, lda) end function hipblasZher2Fortran ! her2Batched function hipblasCher2BatchedFortran(handle, uplo, n, alpha, & x, incx, y, incy, A, lda, batch_count) & bind(c, name='hipblasCher2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCher2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasCher2BatchedFortran = & hipblasCher2Batched(handle, uplo, n, alpha, x, incx, & y, incy, A, lda, batch_count) end function hipblasCher2BatchedFortran function hipblasZher2BatchedFortran(handle, uplo, n, alpha, & x, incx, y, incy, A, lda, batch_count) & bind(c, name='hipblasZher2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZher2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasZher2BatchedFortran = & hipblasZher2Batched(handle, uplo, n, alpha, x, incx, & y, incy, A, lda, batch_count) end function hipblasZher2BatchedFortran ! her2StridedBatched function hipblasCher2StridedBatchedFortran(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasCher2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCher2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasCher2StridedBatchedFortran = & hipblasCher2StridedBatched(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasCher2StridedBatchedFortran function hipblasZher2StridedBatchedFortran(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasZher2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZher2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasZher2StridedBatchedFortran = & hipblasZher2StridedBatched(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasZher2StridedBatchedFortran ! hpmv function hipblasChpmvFortran(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy) & bind(c, name='hipblasChpmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChpmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasChpmvFortran = & hipblasChpmv(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy) end function hipblasChpmvFortran function hipblasZhpmvFortran(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy) & bind(c, name='hipblasZhpmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhpmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasZhpmvFortran = & hipblasZhpmv(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy) end function hipblasZhpmvFortran ! hpmvBatched function hipblasChpmvBatchedFortran(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasChpmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChpmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasChpmvBatchedFortran = & hipblasChpmvBatched(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy, batch_count) end function hipblasChpmvBatchedFortran function hipblasZhpmvBatchedFortran(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasZhpmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhpmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasZhpmvBatchedFortran = & hipblasZhpmvBatched(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy, batch_count) end function hipblasZhpmvBatchedFortran ! hpmvStridedBatched function hipblasChpmvStridedBatchedFortran(handle, uplo, n, alpha, AP, stride_AP, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasChpmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChpmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasChpmvStridedBatchedFortran = & hipblasChpmvStridedBatched(handle, uplo, n, alpha, AP, stride_AP, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasChpmvStridedBatchedFortran function hipblasZhpmvStridedBatchedFortran(handle, uplo, n, alpha, AP, stride_AP, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasZhpmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhpmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasZhpmvStridedBatchedFortran = & hipblasZhpmvStridedBatched(handle, uplo, n, alpha, AP, stride_AP, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasZhpmvStridedBatchedFortran ! hpr function hipblasChprFortran(handle, uplo, n, alpha, & x, incx, AP) & bind(c, name='hipblasChprFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChprFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP hipblasChprFortran = & hipblasChpr(handle, uplo, n, alpha, x, incx, AP) end function hipblasChprFortran function hipblasZhprFortran(handle, uplo, n, alpha, & x, incx, AP) & bind(c, name='hipblasZhprFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhprFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP hipblasZhprFortran = & hipblasZhpr(handle, uplo, n, alpha, x, incx, AP) end function hipblasZhprFortran ! hprBatched function hipblasChprBatchedFortran(handle, uplo, n, alpha, & x, incx, AP, batch_count) & bind(c, name='hipblasChprBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChprBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasChprBatchedFortran = & hipblasChprBatched(handle, uplo, n, alpha, x, incx, AP, batch_count) end function hipblasChprBatchedFortran function hipblasZhprBatchedFortran(handle, uplo, n, alpha, & x, incx, AP, batch_count) & bind(c, name='hipblasZhprBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhprBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasZhprBatchedFortran = & hipblasZhprBatched(handle, uplo, n, alpha, x, incx, AP, batch_count) end function hipblasZhprBatchedFortran ! hprStridedBatched function hipblasChprStridedBatchedFortran(handle, uplo, n, alpha, & x, incx, stride_x, AP, stride_AP, batch_count) & bind(c, name='hipblasChprStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChprStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasChprStridedBatchedFortran = & hipblasChprStridedBatched(handle, uplo, n, alpha, x, incx, stride_x, & AP, stride_AP, batch_count) end function hipblasChprStridedBatchedFortran function hipblasZhprStridedBatchedFortran(handle, uplo, n, alpha, & x, incx, stride_x, AP, stride_AP, batch_count) & bind(c, name='hipblasZhprStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhprStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasZhprStridedBatchedFortran = & hipblasZhprStridedBatched(handle, uplo, n, alpha, x, incx, stride_x, & AP, stride_AP, batch_count) end function hipblasZhprStridedBatchedFortran ! hpr2 function hipblasChpr2Fortran(handle, uplo, n, alpha, & x, incx, y, incy, AP) & bind(c, name='hipblasChpr2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChpr2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: AP hipblasChpr2Fortran = & hipblasChpr2(handle, uplo, n, alpha, x, incx, & y, incy, AP) end function hipblasChpr2Fortran function hipblasZhpr2Fortran(handle, uplo, n, alpha, & x, incx, y, incy, AP) & bind(c, name='hipblasZhpr2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhpr2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: AP hipblasZhpr2Fortran = & hipblasZhpr2(handle, uplo, n, alpha, x, incx, & y, incy, AP) end function hipblasZhpr2Fortran ! hpr2Batched function hipblasChpr2BatchedFortran(handle, uplo, n, alpha, & x, incx, y, incy, AP, batch_count) & bind(c, name='hipblasChpr2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChpr2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasChpr2BatchedFortran = & hipblasChpr2Batched(handle, uplo, n, alpha, x, incx, & y, incy, AP, batch_count) end function hipblasChpr2BatchedFortran function hipblasZhpr2BatchedFortran(handle, uplo, n, alpha, & x, incx, y, incy, AP, batch_count) & bind(c, name='hipblasZhpr2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhpr2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasZhpr2BatchedFortran = & hipblasZhpr2Batched(handle, uplo, n, alpha, x, incx, & y, incy, AP, batch_count) end function hipblasZhpr2BatchedFortran ! hpr2StridedBatched function hipblasChpr2StridedBatchedFortran(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, AP, stride_AP, batch_count) & bind(c, name='hipblasChpr2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChpr2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasChpr2StridedBatchedFortran = & hipblasChpr2StridedBatched(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, AP, stride_AP, batch_count) end function hipblasChpr2StridedBatchedFortran function hipblasZhpr2StridedBatchedFortran(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, AP, stride_AP, batch_count) & bind(c, name='hipblasZhpr2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhpr2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasZhpr2StridedBatchedFortran = & hipblasZhpr2StridedBatched(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, AP, stride_AP, batch_count) end function hipblasZhpr2StridedBatchedFortran ! trmv function hipblasStrmvFortran(handle, uplo, transA, diag, m, & A, lda, x, incx) & bind(c, name='hipblasStrmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasStrmvFortran = & hipblasStrmv(handle, uplo, transA, diag, m, & A, lda, x, incx) end function hipblasStrmvFortran function hipblasDtrmvFortran(handle, uplo, transA, diag, m, & A, lda, x, incx) & bind(c, name='hipblasDtrmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasDtrmvFortran = & hipblasDtrmv(handle, uplo, transA, diag, m, & A, lda, x, incx) end function hipblasDtrmvFortran function hipblasCtrmvFortran(handle, uplo, transA, diag, m, & A, lda, x, incx) & bind(c, name='hipblasCtrmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasCtrmvFortran = & hipblasCtrmv(handle, uplo, transA, diag, m, & A, lda, x, incx) end function hipblasCtrmvFortran function hipblasZtrmvFortran(handle, uplo, transA, diag, m, & A, lda, x, incx) & bind(c, name='hipblasZtrmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasZtrmvFortran = & hipblasZtrmv(handle, uplo, transA, diag, m, & A, lda, x, incx) end function hipblasZtrmvFortran ! trmvBatched function hipblasStrmvBatchedFortran(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) & bind(c, name='hipblasStrmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasStrmvBatchedFortran = & hipblasStrmvBatched(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) end function hipblasStrmvBatchedFortran function hipblasDtrmvBatchedFortran(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) & bind(c, name='hipblasDtrmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasDtrmvBatchedFortran = & hipblasDtrmvBatched(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) end function hipblasDtrmvBatchedFortran function hipblasCtrmvBatchedFortran(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) & bind(c, name='hipblasCtrmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasCtrmvBatchedFortran = & hipblasCtrmvBatched(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) end function hipblasCtrmvBatchedFortran function hipblasZtrmvBatchedFortran(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) & bind(c, name='hipblasZtrmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasZtrmvBatchedFortran = & hipblasZtrmvBatched(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) end function hipblasZtrmvBatchedFortran ! trmvStridedBatched function hipblasStrmvStridedBatchedFortran(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasStrmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasStrmvStridedBatchedFortran = & hipblasStrmvStridedBatched(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasStrmvStridedBatchedFortran function hipblasDtrmvStridedBatchedFortran(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasDtrmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasDtrmvStridedBatchedFortran = & hipblasDtrmvStridedBatched(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasDtrmvStridedBatchedFortran function hipblasCtrmvStridedBatchedFortran(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasCtrmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasCtrmvStridedBatchedFortran = & hipblasCtrmvStridedBatched(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasCtrmvStridedBatchedFortran function hipblasZtrmvStridedBatchedFortran(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasZtrmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasZtrmvStridedBatchedFortran = & hipblasZtrmvStridedBatched(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasZtrmvStridedBatchedFortran ! tpmv function hipblasStpmvFortran(handle, uplo, transA, diag, m, & AP, x, incx) & bind(c, name='hipblasStpmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStpmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx hipblasStpmvFortran = & hipblasStpmv(handle, uplo, transA, diag, m, & AP, x, incx) end function hipblasStpmvFortran function hipblasDtpmvFortran(handle, uplo, transA, diag, m, & AP, x, incx) & bind(c, name='hipblasDtpmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtpmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx hipblasDtpmvFortran = & hipblasDtpmv(handle, uplo, transA, diag, m, & AP, x, incx) end function hipblasDtpmvFortran function hipblasCtpmvFortran(handle, uplo, transA, diag, m, & AP, x, incx) & bind(c, name='hipblasCtpmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtpmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx hipblasCtpmvFortran = & hipblasCtpmv(handle, uplo, transA, diag, m, & AP, x, incx) end function hipblasCtpmvFortran function hipblasZtpmvFortran(handle, uplo, transA, diag, m, & AP, x, incx) & bind(c, name='hipblasZtpmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtpmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx hipblasZtpmvFortran = & hipblasZtpmv(handle, uplo, transA, diag, m, & AP, x, incx) end function hipblasZtpmvFortran ! tpmvBatched function hipblasStpmvBatchedFortran(handle, uplo, transA, diag, m, & AP, x, incx, batch_count) & bind(c, name='hipblasStpmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStpmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasStpmvBatchedFortran = & hipblasStpmvBatched(handle, uplo, transA, diag, m, & AP, x, incx, batch_count) end function hipblasStpmvBatchedFortran function hipblasDtpmvBatchedFortran(handle, uplo, transA, diag, m, & AP, x, incx, batch_count) & bind(c, name='hipblasDtpmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtpmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasDtpmvBatchedFortran = & hipblasDtpmvBatched(handle, uplo, transA, diag, m, & AP, x, incx, batch_count) end function hipblasDtpmvBatchedFortran function hipblasCtpmvBatchedFortran(handle, uplo, transA, diag, m, & AP, x, incx, batch_count) & bind(c, name='hipblasCtpmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtpmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasCtpmvBatchedFortran = & hipblasCtpmvBatched(handle, uplo, transA, diag, m, & AP, x, incx, batch_count) end function hipblasCtpmvBatchedFortran function hipblasZtpmvBatchedFortran(handle, uplo, transA, diag, m, & AP, x, incx, batch_count) & bind(c, name='hipblasZtpmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtpmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasZtpmvBatchedFortran = & hipblasZtpmvBatched(handle, uplo, transA, diag, m, & AP, x, incx, batch_count) end function hipblasZtpmvBatchedFortran ! tpmvStridedBatched function hipblasStpmvStridedBatchedFortran(handle, uplo, transA, diag, m, & AP, stride_AP, x, incx, stride_x, batch_count) & bind(c, name='hipblasStpmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStpmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasStpmvStridedBatchedFortran = & hipblasStpmvStridedBatched(handle, uplo, transA, diag, m, & AP, stride_AP, x, incx, stride_x, batch_count) end function hipblasStpmvStridedBatchedFortran function hipblasDtpmvStridedBatchedFortran(handle, uplo, transA, diag, m, & AP, stride_AP, x, incx, stride_x, batch_count) & bind(c, name='hipblasDtpmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtpmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasDtpmvStridedBatchedFortran = & hipblasDtpmvStridedBatched(handle, uplo, transA, diag, m, & AP, stride_AP, x, incx, stride_x, batch_count) end function hipblasDtpmvStridedBatchedFortran function hipblasCtpmvStridedBatchedFortran(handle, uplo, transA, diag, m, & AP, stride_AP, x, incx, stride_x, batch_count) & bind(c, name='hipblasCtpmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtpmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasCtpmvStridedBatchedFortran = & hipblasCtpmvStridedBatched(handle, uplo, transA, diag, m, & AP, stride_AP, x, incx, stride_x, batch_count) end function hipblasCtpmvStridedBatchedFortran function hipblasZtpmvStridedBatchedFortran(handle, uplo, transA, diag, m, & AP, stride_AP, x, incx, stride_x, batch_count) & bind(c, name='hipblasZtpmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtpmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasZtpmvStridedBatchedFortran = & hipblasZtpmvStridedBatched(handle, uplo, transA, diag, m, & AP, stride_AP, x, incx, stride_x, batch_count) end function hipblasZtpmvStridedBatchedFortran ! tbmv function hipblasStbmvFortran(handle, uplo, transA, diag, m, k, & A, lda, x, incx) & bind(c, name='hipblasStbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasStbmvFortran = & hipblasStbmv(handle, uplo, transA, diag, m, k, & A, lda, x, incx) end function hipblasStbmvFortran function hipblasDtbmvFortran(handle, uplo, transA, diag, m, k, & A, lda, x, incx) & bind(c, name='hipblasDtbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasDtbmvFortran = & hipblasDtbmv(handle, uplo, transA, diag, m, k, & A, lda, x, incx) end function hipblasDtbmvFortran function hipblasCtbmvFortran(handle, uplo, transA, diag, m, k, & A, lda, x, incx) & bind(c, name='hipblasCtbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasCtbmvFortran = & hipblasCtbmv(handle, uplo, transA, diag, m, k, & A, lda, x, incx) end function hipblasCtbmvFortran function hipblasZtbmvFortran(handle, uplo, transA, diag, m, k, & A, lda, x, incx) & bind(c, name='hipblasZtbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasZtbmvFortran = & hipblasZtbmv(handle, uplo, transA, diag, m, k, & A, lda, x, incx) end function hipblasZtbmvFortran ! tbmvBatched function hipblasStbmvBatchedFortran(handle, uplo, transA, diag, m, k, & A, lda, x, incx, batch_count) & bind(c, name='hipblasStbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasStbmvBatchedFortran = & hipblasStbmvBatched(handle, uplo, transA, diag, m, k, & A, lda, x, incx, batch_count) end function hipblasStbmvBatchedFortran function hipblasDtbmvBatchedFortran(handle, uplo, transA, diag, m, k, & A, lda, x, incx, batch_count) & bind(c, name='hipblasDtbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasDtbmvBatchedFortran = & hipblasDtbmvBatched(handle, uplo, transA, diag, m, k, & A, lda, x, incx, batch_count) end function hipblasDtbmvBatchedFortran function hipblasCtbmvBatchedFortran(handle, uplo, transA, diag, m, k, & A, lda, x, incx, batch_count) & bind(c, name='hipblasCtbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasCtbmvBatchedFortran = & hipblasCtbmvBatched(handle, uplo, transA, diag, m, k, & A, lda, x, incx, batch_count) end function hipblasCtbmvBatchedFortran function hipblasZtbmvBatchedFortran(handle, uplo, transA, diag, m, k, & A, lda, x, incx, batch_count) & bind(c, name='hipblasZtbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasZtbmvBatchedFortran = & hipblasZtbmvBatched(handle, uplo, transA, diag, m, k, & A, lda, x, incx, batch_count) end function hipblasZtbmvBatchedFortran ! tbmvStridedBatched function hipblasStbmvStridedBatchedFortran(handle, uplo, transA, diag, m, k, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasStbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasStbmvStridedBatchedFortran = & hipblasStbmvStridedBatched(handle, uplo, transA, diag, m, k, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasStbmvStridedBatchedFortran function hipblasDtbmvStridedBatchedFortran(handle, uplo, transA, diag, m, k, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasDtbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasDtbmvStridedBatchedFortran = & hipblasDtbmvStridedBatched(handle, uplo, transA, diag, m, k, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasDtbmvStridedBatchedFortran function hipblasCtbmvStridedBatchedFortran(handle, uplo, transA, diag, m, k, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasCtbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasCtbmvStridedBatchedFortran = & hipblasCtbmvStridedBatched(handle, uplo, transA, diag, m, k, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasCtbmvStridedBatchedFortran function hipblasZtbmvStridedBatchedFortran(handle, uplo, transA, diag, m, k, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasZtbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasZtbmvStridedBatchedFortran = & hipblasZtbmvStridedBatched(handle, uplo, transA, diag, m, k, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasZtbmvStridedBatchedFortran ! tbsv function hipblasStbsvFortran(handle, uplo, transA, diag, n, k, & A, lda, x, incx) & bind(c, name='hipblasStbsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStbsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasStbsvFortran = & hipblasStbsv(handle, uplo, transA, diag, n, k, & A, lda, x, incx) end function hipblasStbsvFortran function hipblasDtbsvFortran(handle, uplo, transA, diag, n, k, & A, lda, x, incx) & bind(c, name='hipblasDtbsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtbsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasDtbsvFortran = & hipblasDtbsv(handle, uplo, transA, diag, n, k, & A, lda, x, incx) end function hipblasDtbsvFortran function hipblasCtbsvFortran(handle, uplo, transA, diag, n, k, & A, lda, x, incx) & bind(c, name='hipblasCtbsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtbsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasCtbsvFortran = & hipblasCtbsv(handle, uplo, transA, diag, n, k, & A, lda, x, incx) end function hipblasCtbsvFortran function hipblasZtbsvFortran(handle, uplo, transA, diag, n, k, & A, lda, x, incx) & bind(c, name='hipblasZtbsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtbsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasZtbsvFortran = & hipblasZtbsv(handle, uplo, transA, diag, n, k, & A, lda, x, incx) end function hipblasZtbsvFortran ! tbsvBatched function hipblasStbsvBatchedFortran(handle, uplo, transA, diag, n, k, & A, lda, x, incx, batch_count) & bind(c, name='hipblasStbsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStbsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasStbsvBatchedFortran = & hipblasStbsvBatched(handle, uplo, transA, diag, n, k, & A, lda, x, incx, batch_count) end function hipblasStbsvBatchedFortran function hipblasDtbsvBatchedFortran(handle, uplo, transA, diag, n, k, & A, lda, x, incx, batch_count) & bind(c, name='hipblasDtbsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtbsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasDtbsvBatchedFortran = & hipblasDtbsvBatched(handle, uplo, transA, diag, n, k, & A, lda, x, incx, batch_count) end function hipblasDtbsvBatchedFortran function hipblasCtbsvBatchedFortran(handle, uplo, transA, diag, n, k, & A, lda, x, incx, batch_count) & bind(c, name='hipblasCtbsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtbsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasCtbsvBatchedFortran = & hipblasCtbsvBatched(handle, uplo, transA, diag, n, k, & A, lda, x, incx, batch_count) end function hipblasCtbsvBatchedFortran function hipblasZtbsvBatchedFortran(handle, uplo, transA, diag, n, k, & A, lda, x, incx, batch_count) & bind(c, name='hipblasZtbsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtbsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasZtbsvBatchedFortran = & hipblasZtbsvBatched(handle, uplo, transA, diag, n, k, & A, lda, x, incx, batch_count) end function hipblasZtbsvBatchedFortran ! tbsvStridedBatched function hipblasStbsvStridedBatchedFortran(handle, uplo, transA, diag, n, k, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasStbsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStbsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasStbsvStridedBatchedFortran = & hipblasStbsvStridedBatched(handle, uplo, transA, diag, n, k, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasStbsvStridedBatchedFortran function hipblasDtbsvStridedBatchedFortran(handle, uplo, transA, diag, n, k, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasDtbsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtbsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasDtbsvStridedBatchedFortran = & hipblasDtbsvStridedBatched(handle, uplo, transA, diag, n, k, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasDtbsvStridedBatchedFortran function hipblasCtbsvStridedBatchedFortran(handle, uplo, transA, diag, n, k, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasCtbsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtbsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasCtbsvStridedBatchedFortran = & hipblasCtbsvStridedBatched(handle, uplo, transA, diag, n, k, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasCtbsvStridedBatchedFortran function hipblasZtbsvStridedBatchedFortran(handle, uplo, transA, diag, n, k, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasZtbsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtbsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasZtbsvStridedBatchedFortran = & hipblasZtbsvStridedBatched(handle, uplo, transA, diag, n, k, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasZtbsvStridedBatchedFortran ! tpsv function hipblasStpsvFortran(handle, uplo, transA, diag, n, & AP, x, incx) & bind(c, name='hipblasStpsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStpsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx hipblasStpsvFortran = & hipblasStpsv(handle, uplo, transA, diag, n, & AP, x, incx) end function hipblasStpsvFortran function hipblasDtpsvFortran(handle, uplo, transA, diag, n, & AP, x, incx) & bind(c, name='hipblasDtpsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtpsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx hipblasDtpsvFortran = & hipblasDtpsv(handle, uplo, transA, diag, n, & AP, x, incx) end function hipblasDtpsvFortran function hipblasCtpsvFortran(handle, uplo, transA, diag, n, & AP, x, incx) & bind(c, name='hipblasCtpsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtpsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx hipblasCtpsvFortran = & hipblasCtpsv(handle, uplo, transA, diag, n, & AP, x, incx) end function hipblasCtpsvFortran function hipblasZtpsvFortran(handle, uplo, transA, diag, n, & AP, x, incx) & bind(c, name='hipblasZtpsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtpsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx hipblasZtpsvFortran = & hipblasZtpsv(handle, uplo, transA, diag, n, & AP, x, incx) end function hipblasZtpsvFortran ! tpsvBatched function hipblasStpsvBatchedFortran(handle, uplo, transA, diag, n, & AP, x, incx, batch_count) & bind(c, name='hipblasStpsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStpsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasStpsvBatchedFortran = & hipblasStpsvBatched(handle, uplo, transA, diag, n, & AP, x, incx, batch_count) end function hipblasStpsvBatchedFortran function hipblasDtpsvBatchedFortran(handle, uplo, transA, diag, n, & AP, x, incx, batch_count) & bind(c, name='hipblasDtpsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtpsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasDtpsvBatchedFortran = & hipblasDtpsvBatched(handle, uplo, transA, diag, n, & AP, x, incx, batch_count) end function hipblasDtpsvBatchedFortran function hipblasCtpsvBatchedFortran(handle, uplo, transA, diag, n, & AP, x, incx, batch_count) & bind(c, name='hipblasCtpsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtpsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasCtpsvBatchedFortran = & hipblasCtpsvBatched(handle, uplo, transA, diag, n, & AP, x, incx, batch_count) end function hipblasCtpsvBatchedFortran function hipblasZtpsvBatchedFortran(handle, uplo, transA, diag, n, & AP, x, incx, batch_count) & bind(c, name='hipblasZtpsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtpsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasZtpsvBatchedFortran = & hipblasZtpsvBatched(handle, uplo, transA, diag, n, & AP, x, incx, batch_count) end function hipblasZtpsvBatchedFortran ! tpsvStridedBatched function hipblasStpsvStridedBatchedFortran(handle, uplo, transA, diag, n, & AP, stride_AP, x, incx, stride_x, batch_count) & bind(c, name='hipblasStpsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStpsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasStpsvStridedBatchedFortran = & hipblasStpsvStridedBatched(handle, uplo, transA, diag, n, & AP, stride_AP, x, incx, stride_x, batch_count) end function hipblasStpsvStridedBatchedFortran function hipblasDtpsvStridedBatchedFortran(handle, uplo, transA, diag, n, & AP, stride_AP, x, incx, stride_x, batch_count) & bind(c, name='hipblasDtpsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtpsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasDtpsvStridedBatchedFortran = & hipblasDtpsvStridedBatched(handle, uplo, transA, diag, n, & AP, stride_AP, x, incx, stride_x, batch_count) end function hipblasDtpsvStridedBatchedFortran function hipblasCtpsvStridedBatchedFortran(handle, uplo, transA, diag, n, & AP, stride_AP, x, incx, stride_x, batch_count) & bind(c, name='hipblasCtpsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtpsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasCtpsvStridedBatchedFortran = & hipblasCtpsvStridedBatched(handle, uplo, transA, diag, n, & AP, stride_AP, x, incx, stride_x, batch_count) end function hipblasCtpsvStridedBatchedFortran function hipblasZtpsvStridedBatchedFortran(handle, uplo, transA, diag, n, & AP, stride_AP, x, incx, stride_x, batch_count) & bind(c, name='hipblasZtpsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtpsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasZtpsvStridedBatchedFortran = & hipblasZtpsvStridedBatched(handle, uplo, transA, diag, n, & AP, stride_AP, x, incx, stride_x, batch_count) end function hipblasZtpsvStridedBatchedFortran ! symv function hipblasSsymvFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasSsymvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsymvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasSsymvFortran = & hipblasSsymv(handle, uplo, n, alpha, & A, lda, x, incx, beta, y, incy) end function hipblasSsymvFortran function hipblasDsymvFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasDsymvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsymvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasDsymvFortran = & hipblasDsymv(handle, uplo, n, alpha, & A, lda, x, incx, beta, y, incy) end function hipblasDsymvFortran function hipblasCsymvFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasCsymvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsymvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasCsymvFortran = & hipblasCsymv(handle, uplo, n, alpha, & A, lda, x, incx, beta, y, incy) end function hipblasCsymvFortran function hipblasZsymvFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasZsymvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsymvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasZsymvFortran = & hipblasZsymv(handle, uplo, n, alpha, & A, lda, x, incx, beta, y, incy) end function hipblasZsymvFortran ! symvBatched function hipblasSsymvBatchedFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasSsymvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsymvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasSsymvBatchedFortran = & hipblasSsymvBatched(handle, uplo, n, alpha, & A, lda, x, incx, beta, y, incy, batch_count) end function hipblasSsymvBatchedFortran function hipblasDsymvBatchedFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasDsymvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsymvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasDsymvBatchedFortran = & hipblasDsymvBatched(handle, uplo, n, alpha, & A, lda, x, incx, beta, y, incy, batch_count) end function hipblasDsymvBatchedFortran function hipblasCsymvBatchedFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasCsymvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsymvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasCsymvBatchedFortran = & hipblasCsymvBatched(handle, uplo, n, alpha, & A, lda, x, incx, beta, y, incy, batch_count) end function hipblasCsymvBatchedFortran function hipblasZsymvBatchedFortran(handle, uplo, n, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasZsymvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsymvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasZsymvBatchedFortran = & hipblasZsymvBatched(handle, uplo, n, alpha, & A, lda, x, incx, beta, y, incy, batch_count) end function hipblasZsymvBatchedFortran ! symvStridedBatched function hipblasSsymvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasSsymvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsymvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasSsymvStridedBatchedFortran = & hipblasSsymvStridedBatched(handle, uplo, n, alpha, & A, lda, stride_A, x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasSsymvStridedBatchedFortran function hipblasDsymvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasDsymvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsymvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasDsymvStridedBatchedFortran = & hipblasDsymvStridedBatched(handle, uplo, n, alpha, & A, lda, stride_A, x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasDsymvStridedBatchedFortran function hipblasCsymvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasCsymvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsymvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasCsymvStridedBatchedFortran = & hipblasCsymvStridedBatched(handle, uplo, n, alpha, & A, lda, stride_A, x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasCsymvStridedBatchedFortran function hipblasZsymvStridedBatchedFortran(handle, uplo, n, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasZsymvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsymvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasZsymvStridedBatchedFortran = & hipblasZsymvStridedBatched(handle, uplo, n, alpha, & A, lda, stride_A, x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasZsymvStridedBatchedFortran ! spmv function hipblasSspmvFortran(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy) & bind(c, name='hipblasSspmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSspmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasSspmvFortran = & hipblasSspmv(handle, uplo, n, alpha, & AP, x, incx, beta, y, incy) end function hipblasSspmvFortran function hipblasDspmvFortran(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy) & bind(c, name='hipblasDspmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDspmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasDspmvFortran = & hipblasDspmv(handle, uplo, n, alpha, & AP, x, incx, beta, y, incy) end function hipblasDspmvFortran ! spmvBatched function hipblasSspmvBatchedFortran(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasSspmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSspmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasSspmvBatchedFortran = & hipblasSspmvBatched(handle, uplo, n, alpha, & AP, x, incx, beta, y, incy, batch_count) end function hipblasSspmvBatchedFortran function hipblasDspmvBatchedFortran(handle, uplo, n, alpha, AP, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasDspmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDspmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasDspmvBatchedFortran = & hipblasDspmvBatched(handle, uplo, n, alpha, & AP, x, incx, beta, y, incy, batch_count) end function hipblasDspmvBatchedFortran ! spmvStridedBatched function hipblasSspmvStridedBatchedFortran(handle, uplo, n, alpha, AP, stride_AP, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasSspmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSspmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasSspmvStridedBatchedFortran = & hipblasSspmvStridedBatched(handle, uplo, n, alpha, & AP, stride_AP, x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasSspmvStridedBatchedFortran function hipblasDspmvStridedBatchedFortran(handle, uplo, n, alpha, AP, stride_AP, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasDspmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDspmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasDspmvStridedBatchedFortran = & hipblasDspmvStridedBatched(handle, uplo, n, alpha, & AP, stride_AP, x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasDspmvStridedBatchedFortran ! sbmv function hipblasSsbmvFortran(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasSsbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasSsbmvFortran = & hipblasSsbmv(handle, uplo, n, k, alpha, & A, lda, x, incx, beta, y, incy) end function hipblasSsbmvFortran function hipblasDsbmvFortran(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy) & bind(c, name='hipblasDsbmvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsbmvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy hipblasDsbmvFortran = & hipblasDsbmv(handle, uplo, n, k, alpha, & A, lda, x, incx, beta, y, incy) end function hipblasDsbmvFortran ! sbmvBatched function hipblasSsbmvBatchedFortran(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasSsbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasSsbmvBatchedFortran = & hipblasSsbmvBatched(handle, uplo, n, k, alpha, & A, lda, x, incx, beta, y, incy, batch_count) end function hipblasSsbmvBatchedFortran function hipblasDsbmvBatchedFortran(handle, uplo, n, k, alpha, A, lda, & x, incx, beta, y, incy, batch_count) & bind(c, name='hipblasDsbmvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsbmvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int), value :: batch_count hipblasDsbmvBatchedFortran = & hipblasDsbmvBatched(handle, uplo, n, k, alpha, & A, lda, x, incx, beta, y, incy, batch_count) end function hipblasDsbmvBatchedFortran ! sbmvStridedBatched function hipblasSsbmvStridedBatchedFortran(handle, uplo, n, k, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasSsbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasSsbmvStridedBatchedFortran = & hipblasSsbmvStridedBatched(handle, uplo, n, k, alpha, & A, lda, stride_A, x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasSsbmvStridedBatchedFortran function hipblasDsbmvStridedBatchedFortran(handle, uplo, n, k, alpha, A, lda, stride_A, & x, incx, stride_x, beta, y, incy, stride_y, batch_count) & bind(c, name='hipblasDsbmvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsbmvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: beta type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y integer(c_int), value :: batch_count hipblasDsbmvStridedBatchedFortran = & hipblasDsbmvStridedBatched(handle, uplo, n, k, alpha, & A, lda, stride_A, x, incx, stride_x, beta, y, incy, stride_y, batch_count) end function hipblasDsbmvStridedBatchedFortran ! ger function hipblasSgerFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasSgerFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgerFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasSgerFortran = & hipblasSger(handle, m, n, alpha, & x, incx, y, incy, A, lda) end function hipblasSgerFortran function hipblasDgerFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasDgerFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgerFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasDgerFortran = & hipblasDger(handle, m, n, alpha, & x, incx, y, incy, A, lda) end function hipblasDgerFortran function hipblasCgeruFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasCgeruFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgeruFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasCgeruFortran = & hipblasCgeru(handle, m, n, alpha, & x, incx, y, incy, A, lda) end function hipblasCgeruFortran function hipblasCgercFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasCgercFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgercFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasCgercFortran = & hipblasCgerc(handle, m, n, alpha, & x, incx, y, incy, A, lda) end function hipblasCgercFortran function hipblasZgeruFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasZgeruFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgeruFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasZgeruFortran = & hipblasZgeru(handle, m, n, alpha, & x, incx, y, incy, A, lda) end function hipblasZgeruFortran function hipblasZgercFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasZgercFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgercFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasZgercFortran = & hipblasZgerc(handle, m, n, alpha, & x, incx, y, incy, A, lda) end function hipblasZgercFortran ! gerBatched function hipblasSgerBatchedFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasSgerBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgerBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasSgerBatchedFortran = & hipblasSgerBatched(handle, m, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasSgerBatchedFortran function hipblasDgerBatchedFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasDgerBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgerBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasDgerBatchedFortran = & hipblasDgerBatched(handle, m, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasDgerBatchedFortran function hipblasCgeruBatchedFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasCgeruBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgeruBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasCgeruBatchedFortran = & hipblasCgeruBatched(handle, m, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasCgeruBatchedFortran function hipblasCgercBatchedFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasCgercBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgercBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasCgercBatchedFortran = & hipblasCgercBatched(handle, m, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasCgercBatchedFortran function hipblasZgeruBatchedFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasZgeruBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgeruBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasZgeruBatchedFortran = & hipblasZgeruBatched(handle, m, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasZgeruBatchedFortran function hipblasZgercBatchedFortran(handle, m, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasZgercBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgercBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasZgercBatchedFortran = & hipblasZgercBatched(handle, m, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasZgercBatchedFortran ! gerStridedBatched function hipblasSgerStridedBatchedFortran(handle, m, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasSgerStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgerStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasSgerStridedBatchedFortran = & hipblasSgerStridedBatched(handle, m, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasSgerStridedBatchedFortran function hipblasDgerStridedBatchedFortran(handle, m, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasDgerStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgerStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasDgerStridedBatchedFortran = & hipblasDgerStridedBatched(handle, m, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasDgerStridedBatchedFortran function hipblasCgeruStridedBatchedFortran(handle, m, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasCgeruStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgeruStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasCgeruStridedBatchedFortran = & hipblasCgeruStridedBatched(handle, m, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasCgeruStridedBatchedFortran function hipblasCgercStridedBatchedFortran(handle, m, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasCgercStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgercStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasCgercStridedBatchedFortran = & hipblasCgercStridedBatched(handle, m, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasCgercStridedBatchedFortran function hipblasZgeruStridedBatchedFortran(handle, m, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasZgeruStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgeruStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasZgeruStridedBatchedFortran = & hipblasZgeruStridedBatched(handle, m, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasZgeruStridedBatchedFortran function hipblasZgercStridedBatchedFortran(handle, m, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasZgercStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgercStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasZgercStridedBatchedFortran = & hipblasZgercStridedBatched(handle, m, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasZgercStridedBatchedFortran ! spr function hipblasSsprFortran(handle, uplo, n, alpha, x, incx, AP) & bind(c, name='hipblasSsprFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsprFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP hipblasSsprFortran = & hipblasSspr(handle, uplo, n, alpha, & x, incx, AP) end function hipblasSsprFortran function hipblasDsprFortran(handle, uplo, n, alpha, x, incx, AP) & bind(c, name='hipblasDsprFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsprFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP hipblasDsprFortran = & hipblasDspr(handle, uplo, n, alpha, & x, incx, AP) end function hipblasDsprFortran function hipblasCsprFortran(handle, uplo, n, alpha, x, incx, AP) & bind(c, name='hipblasCsprFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsprFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP hipblasCsprFortran = & hipblasCspr(handle, uplo, n, alpha, & x, incx, AP) end function hipblasCsprFortran function hipblasZsprFortran(handle, uplo, n, alpha, x, incx, AP) & bind(c, name='hipblasZsprFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsprFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP hipblasZsprFortran = & hipblasZspr(handle, uplo, n, alpha, & x, incx, AP) end function hipblasZsprFortran ! sprBatched function hipblasSsprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batch_count) & bind(c, name='hipblasSsprBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsprBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasSsprBatchedFortran = & hipblasSsprBatched(handle, uplo, n, alpha, & x, incx, AP, batch_count) end function hipblasSsprBatchedFortran function hipblasDsprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batch_count) & bind(c, name='hipblasDsprBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsprBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasDsprBatchedFortran = & hipblasDsprBatched(handle, uplo, n, alpha, & x, incx, AP, batch_count) end function hipblasDsprBatchedFortran function hipblasCsprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batch_count) & bind(c, name='hipblasCsprBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsprBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasCsprBatchedFortran = & hipblasCsprBatched(handle, uplo, n, alpha, & x, incx, AP, batch_count) end function hipblasCsprBatchedFortran function hipblasZsprBatchedFortran(handle, uplo, n, alpha, x, incx, AP, batch_count) & bind(c, name='hipblasZsprBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsprBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasZsprBatchedFortran = & hipblasZsprBatched(handle, uplo, n, alpha, & x, incx, AP, batch_count) end function hipblasZsprBatchedFortran ! sprStridedBatched function hipblasSsprStridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & AP, stride_AP, batch_count) & bind(c, name='hipblasSsprStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsprStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasSsprStridedBatchedFortran = & hipblasSsprStridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, AP, stride_AP, batch_count) end function hipblasSsprStridedBatchedFortran function hipblasDsprStridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & AP, stride_AP, batch_count) & bind(c, name='hipblasDsprStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsprStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasDsprStridedBatchedFortran = & hipblasDsprStridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, AP, stride_AP, batch_count) end function hipblasDsprStridedBatchedFortran function hipblasCsprStridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & AP, stride_AP, batch_count) & bind(c, name='hipblasCsprStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsprStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasCsprStridedBatchedFortran = & hipblasCsprStridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, AP, stride_AP, batch_count) end function hipblasCsprStridedBatchedFortran function hipblasZsprStridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & AP, stride_AP, batch_count) & bind(c, name='hipblasZsprStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsprStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasZsprStridedBatchedFortran = & hipblasZsprStridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, AP, stride_AP, batch_count) end function hipblasZsprStridedBatchedFortran ! spr2 function hipblasSspr2Fortran(handle, uplo, n, alpha, x, incx, & y, incy, AP) & bind(c, name='hipblasSspr2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSspr2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: AP hipblasSspr2Fortran = & hipblasSspr2(handle, uplo, n, alpha, & x, incx, y, incy, AP) end function hipblasSspr2Fortran function hipblasDspr2Fortran(handle, uplo, n, alpha, x, incx, & y, incy, AP) & bind(c, name='hipblasDspr2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDspr2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: AP hipblasDspr2Fortran = & hipblasDspr2(handle, uplo, n, alpha, & x, incx, y, incy, AP) end function hipblasDspr2Fortran ! spr2Batched function hipblasSspr2BatchedFortran(handle, uplo, n, alpha, x, incx, & y, incy, AP, batch_count) & bind(c, name='hipblasSspr2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSspr2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasSspr2BatchedFortran = & hipblasSspr2Batched(handle, uplo, n, alpha, & x, incx, y, incy, AP, batch_count) end function hipblasSspr2BatchedFortran function hipblasDspr2BatchedFortran(handle, uplo, n, alpha, x, incx, & y, incy, AP, batch_count) & bind(c, name='hipblasDspr2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDspr2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: AP integer(c_int), value :: batch_count hipblasDspr2BatchedFortran = & hipblasDspr2Batched(handle, uplo, n, alpha, & x, incx, y, incy, AP, batch_count) end function hipblasDspr2BatchedFortran ! spr2StridedBatched function hipblasSspr2StridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, AP, stride_AP, batch_count) & bind(c, name='hipblasSspr2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSspr2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasSspr2StridedBatchedFortran = & hipblasSspr2StridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, AP, stride_AP, batch_count) end function hipblasSspr2StridedBatchedFortran function hipblasDspr2StridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, AP, stride_AP, batch_count) & bind(c, name='hipblasDspr2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDspr2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: AP integer(c_int64_t), value :: stride_AP integer(c_int), value :: batch_count hipblasDspr2StridedBatchedFortran = & hipblasDspr2StridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, AP, stride_AP, batch_count) end function hipblasDspr2StridedBatchedFortran ! syr function hipblasSsyrFortran(handle, uplo, n, alpha, x, incx, A, lda) & bind(c, name='hipblasSsyrFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyrFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda hipblasSsyrFortran = & hipblasSsyr(handle, uplo, n, alpha, & x, incx, A, lda) end function hipblasSsyrFortran function hipblasDsyrFortran(handle, uplo, n, alpha, x, incx, A, lda) & bind(c, name='hipblasDsyrFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyrFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda hipblasDsyrFortran = & hipblasDsyr(handle, uplo, n, alpha, & x, incx, A, lda) end function hipblasDsyrFortran function hipblasCsyrFortran(handle, uplo, n, alpha, x, incx, A, lda) & bind(c, name='hipblasCsyrFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda hipblasCsyrFortran = & hipblasCsyr(handle, uplo, n, alpha, & x, incx, A, lda) end function hipblasCsyrFortran function hipblasZsyrFortran(handle, uplo, n, alpha, x, incx, A, lda) & bind(c, name='hipblasZsyrFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyrFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda hipblasZsyrFortran = & hipblasZsyr(handle, uplo, n, alpha, & x, incx, A, lda) end function hipblasZsyrFortran ! syrBatched function hipblasSsyrBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batch_count) & bind(c, name='hipblasSsyrBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyrBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasSsyrBatchedFortran = & hipblasSsyrBatched(handle, uplo, n, alpha, & x, incx, A, lda, batch_count) end function hipblasSsyrBatchedFortran function hipblasDsyrBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batch_count) & bind(c, name='hipblasDsyrBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyrBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasDsyrBatchedFortran = & hipblasDsyrBatched(handle, uplo, n, alpha, & x, incx, A, lda, batch_count) end function hipblasDsyrBatchedFortran function hipblasCsyrBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batch_count) & bind(c, name='hipblasCsyrBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasCsyrBatchedFortran = & hipblasCsyrBatched(handle, uplo, n, alpha, & x, incx, A, lda, batch_count) end function hipblasCsyrBatchedFortran function hipblasZsyrBatchedFortran(handle, uplo, n, alpha, x, incx, A, lda, batch_count) & bind(c, name='hipblasZsyrBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyrBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasZsyrBatchedFortran = & hipblasZsyrBatched(handle, uplo, n, alpha, & x, incx, A, lda, batch_count) end function hipblasZsyrBatchedFortran ! syrStridedBatched function hipblasSsyrStridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & A, lda, stride_A, batch_count) & bind(c, name='hipblasSsyrStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyrStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasSsyrStridedBatchedFortran = & hipblasSsyrStridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, A, lda, stride_A, batch_count) end function hipblasSsyrStridedBatchedFortran function hipblasDsyrStridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & A, lda, stride_A, batch_count) & bind(c, name='hipblasDsyrStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyrStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasDsyrStridedBatchedFortran = & hipblasDsyrStridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, A, lda, stride_A, batch_count) end function hipblasDsyrStridedBatchedFortran function hipblasCsyrStridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & A, lda, stride_A, batch_count) & bind(c, name='hipblasCsyrStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasCsyrStridedBatchedFortran = & hipblasCsyrStridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, A, lda, stride_A, batch_count) end function hipblasCsyrStridedBatchedFortran function hipblasZsyrStridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & A, lda, stride_A, batch_count) & bind(c, name='hipblasZsyrStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyrStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasZsyrStridedBatchedFortran = & hipblasZsyrStridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, A, lda, stride_A, batch_count) end function hipblasZsyrStridedBatchedFortran ! syr2 function hipblasSsyr2Fortran(handle, uplo, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasSsyr2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyr2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasSsyr2Fortran = & hipblasSsyr2(handle, uplo, n, alpha, & x, incx, y, incy, A, lda) end function hipblasSsyr2Fortran function hipblasDsyr2Fortran(handle, uplo, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasDsyr2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyr2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasDsyr2Fortran = & hipblasDsyr2(handle, uplo, n, alpha, & x, incx, y, incy, A, lda) end function hipblasDsyr2Fortran function hipblasCsyr2Fortran(handle, uplo, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasCsyr2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyr2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasCsyr2Fortran = & hipblasCsyr2(handle, uplo, n, alpha, & x, incx, y, incy, A, lda) end function hipblasCsyr2Fortran function hipblasZsyr2Fortran(handle, uplo, n, alpha, x, incx, & y, incy, A, lda) & bind(c, name='hipblasZsyr2Fortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyr2Fortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda hipblasZsyr2Fortran = & hipblasZsyr2(handle, uplo, n, alpha, & x, incx, y, incy, A, lda) end function hipblasZsyr2Fortran ! syr2Batched function hipblasSsyr2BatchedFortran(handle, uplo, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasSsyr2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyr2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasSsyr2BatchedFortran = & hipblasSsyr2Batched(handle, uplo, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasSsyr2BatchedFortran function hipblasDsyr2BatchedFortran(handle, uplo, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasDsyr2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyr2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasDsyr2BatchedFortran = & hipblasDsyr2Batched(handle, uplo, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasDsyr2BatchedFortran function hipblasCsyr2BatchedFortran(handle, uplo, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasCsyr2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyr2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasCsyr2BatchedFortran = & hipblasCsyr2Batched(handle, uplo, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasCsyr2BatchedFortran function hipblasZsyr2BatchedFortran(handle, uplo, n, alpha, x, incx, & y, incy, A, lda, batch_count) & bind(c, name='hipblasZsyr2BatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyr2BatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: y integer(c_int), value :: incy type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: batch_count hipblasZsyr2BatchedFortran = & hipblasZsyr2Batched(handle, uplo, n, alpha, & x, incx, y, incy, A, lda, batch_count) end function hipblasZsyr2BatchedFortran ! syr2StridedBatched function hipblasSsyr2StridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasSsyr2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyr2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasSsyr2StridedBatchedFortran = & hipblasSsyr2StridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasSsyr2StridedBatchedFortran function hipblasDsyr2StridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasDsyr2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyr2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasDsyr2StridedBatchedFortran = & hipblasDsyr2StridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasDsyr2StridedBatchedFortran function hipblasCsyr2StridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasCsyr2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyr2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasCsyr2StridedBatchedFortran = & hipblasCsyr2StridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasCsyr2StridedBatchedFortran function hipblasZsyr2StridedBatchedFortran(handle, uplo, n, alpha, x, incx, stride_x, & y, incy, stride_y, A, lda, stride_A, batch_count) & bind(c, name='hipblasZsyr2StridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyr2StridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: y integer(c_int), value :: incy integer(c_int64_t), value :: stride_y type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A integer(c_int), value :: batch_count hipblasZsyr2StridedBatchedFortran = & hipblasZsyr2StridedBatched(handle, uplo, n, alpha, & x, incx, stride_x, y, incy, stride_y, A, lda, stride_A, batch_count) end function hipblasZsyr2StridedBatchedFortran ! trsv function hipblasStrsvFortran(handle, uplo, transA, diag, m, & A, lda, x, incx) & bind(c, name='hipblasStrsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasStrsvFortran = & hipblasStrsv(handle, uplo, transA, diag, m, & A, lda, x, incx) end function hipblasStrsvFortran function hipblasDtrsvFortran(handle, uplo, transA, diag, m, & A, lda, x, incx) & bind(c, name='hipblasDtrsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasDtrsvFortran = & hipblasDtrsv(handle, uplo, transA, diag, m, & A, lda, x, incx) end function hipblasDtrsvFortran function hipblasCtrsvFortran(handle, uplo, transA, diag, m, & A, lda, x, incx) & bind(c, name='hipblasCtrsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasCtrsvFortran = & hipblasCtrsv(handle, uplo, transA, diag, m, & A, lda, x, incx) end function hipblasCtrsvFortran function hipblasZtrsvFortran(handle, uplo, transA, diag, m, & A, lda, x, incx) & bind(c, name='hipblasZtrsvFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrsvFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx hipblasZtrsvFortran = & hipblasZtrsv(handle, uplo, transA, diag, m, & A, lda, x, incx) end function hipblasZtrsvFortran ! trsvBatched function hipblasStrsvBatchedFortran(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) & bind(c, name='hipblasStrsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasStrsvBatchedFortran = & hipblasStrsvBatched(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) end function hipblasStrsvBatchedFortran function hipblasDtrsvBatchedFortran(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) & bind(c, name='hipblasDtrsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasDtrsvBatchedFortran = & hipblasDtrsvBatched(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) end function hipblasDtrsvBatchedFortran function hipblasCtrsvBatchedFortran(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) & bind(c, name='hipblasCtrsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasCtrsvBatchedFortran = & hipblasCtrsvBatched(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) end function hipblasCtrsvBatchedFortran function hipblasZtrsvBatchedFortran(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) & bind(c, name='hipblasZtrsvBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrsvBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int), value :: batch_count hipblasZtrsvBatchedFortran = & hipblasZtrsvBatched(handle, uplo, transA, diag, m, & A, lda, x, incx, batch_count) end function hipblasZtrsvBatchedFortran ! trsvStridedBatched function hipblasStrsvStridedBatchedFortran(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasStrsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasStrsvStridedBatchedFortran = & hipblasStrsvStridedBatched(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasStrsvStridedBatchedFortran function hipblasDtrsvStridedBatchedFortran(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasDtrsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasDtrsvStridedBatchedFortran = & hipblasDtrsvStridedBatched(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasDtrsvStridedBatchedFortran function hipblasCtrsvStridedBatchedFortran(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasCtrsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasCtrsvStridedBatchedFortran = & hipblasCtrsvStridedBatched(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasCtrsvStridedBatchedFortran function hipblasZtrsvStridedBatchedFortran(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) & bind(c, name='hipblasZtrsvStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrsvStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_NON_UNIT)), value :: diag integer(c_int), value :: m type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x integer(c_int), value :: batch_count hipblasZtrsvStridedBatchedFortran = & hipblasZtrsvStridedBatched(handle, uplo, transA, diag, m, & A, lda, stride_A, x, incx, stride_x, batch_count) end function hipblasZtrsvStridedBatchedFortran !--------! ! blas 3 ! !--------! ! hemm function hipblasChemmFortran(handle, side, uplo, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasChemmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChemmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasChemmFortran = & hipblasChemm(handle, side, uplo, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasChemmFortran function hipblasZhemmFortran(handle, side, uplo, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasZhemmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhemmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZhemmFortran = & hipblasZhemm(handle, side, uplo, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasZhemmFortran ! hemmBatched function hipblasChemmBatchedFortran(handle, side, uplo, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasChemmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChemmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasChemmBatchedFortran = & hipblasChemmBatched(handle, side, uplo, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasChemmBatchedFortran function hipblasZhemmBatchedFortran(handle, side, uplo, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasZhemmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhemmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZhemmBatchedFortran = & hipblasZhemmBatched(handle, side, uplo, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasZhemmBatchedFortran ! hemmStridedBatched function hipblasChemmStridedBatchedFortran(handle, side, uplo, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasChemmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasChemmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasChemmStridedBatchedFortran = & hipblasChemmStridedBatched(handle, side, uplo, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasChemmStridedBatchedFortran function hipblasZhemmStridedBatchedFortran(handle, side, uplo, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZhemmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZhemmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZhemmStridedBatchedFortran = & hipblasZhemmStridedBatched(handle, side, uplo, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasZhemmStridedBatchedFortran ! herk function hipblasCherkFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) & bind(c, name='hipblasCherkFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherkFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCherkFortran = & hipblasCherk(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) end function hipblasCherkFortran function hipblasZherkFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) & bind(c, name='hipblasZherkFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZherkFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZherkFortran = & hipblasZherk(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) end function hipblasZherkFortran ! herkBatched function hipblasCherkBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) & bind(c, name='hipblasCherkBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherkBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCherkBatchedFortran = & hipblasCherkBatched(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) end function hipblasCherkBatchedFortran function hipblasZherkBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) & bind(c, name='hipblasZherkBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZherkBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZherkBatchedFortran = & hipblasZherkBatched(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) end function hipblasZherkBatchedFortran ! herkStridedBatched function hipblasCherkStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCherkStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherkStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCherkStridedBatchedFortran = & hipblasCherkStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) end function hipblasCherkStridedBatchedFortran function hipblasZherkStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZherkStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZherkStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZherkStridedBatchedFortran = & hipblasZherkStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) end function hipblasZherkStridedBatchedFortran ! her2k function hipblasCher2kFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasCher2kFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCher2kFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCher2kFortran = & hipblasCher2k(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasCher2kFortran function hipblasZher2kFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasZher2kFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZher2kFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZher2kFortran = & hipblasZher2k(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasZher2kFortran ! her2kBatched function hipblasCher2kBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasCher2kBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCher2kBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCher2kBatchedFortran = & hipblasCher2kBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasCher2kBatchedFortran function hipblasZher2kBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasZher2kBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZher2kBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZher2kBatchedFortran = & hipblasZher2kBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasZher2kBatchedFortran ! her2kStridedBatched function hipblasCher2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCher2kStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCher2kStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCher2kStridedBatchedFortran = & hipblasCher2kStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasCher2kStridedBatchedFortran function hipblasZher2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZher2kStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZher2kStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZher2kStridedBatchedFortran = & hipblasZher2kStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasZher2kStridedBatchedFortran ! herkx function hipblasCherkxFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasCherkxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherkxFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCherkxFortran = & hipblasCherkx(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasCherkxFortran function hipblasZherkxFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasZherkxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZherkxFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZherkxFortran = & hipblasZherkx(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasZherkxFortran ! herkxBatched function hipblasCherkxBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasCherkxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherkxBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCherkxBatchedFortran = & hipblasCherkxBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasCherkxBatchedFortran function hipblasZherkxBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasZherkxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZherkxBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZherkxBatchedFortran = & hipblasZherkxBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasZherkxBatchedFortran ! herkxStridedBatched function hipblasCherkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCherkxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherkxStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCherkxStridedBatchedFortran = & hipblasCherkxStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasCherkxStridedBatchedFortran function hipblasZherkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZherkxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZherkxStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZherkxStridedBatchedFortran = & hipblasZherkxStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasZherkxStridedBatchedFortran ! symm function hipblasSsymmFortran(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasSsymmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsymmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasSsymmFortran = & hipblasSsymm(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasSsymmFortran function hipblasDsymmFortran(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasDsymmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsymmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasDsymmFortran = & hipblasDsymm(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasDsymmFortran function hipblasCsymmFortran(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasCsymmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsymmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCsymmFortran = & hipblasCsymm(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasCsymmFortran function hipblasZsymmFortran(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasZsymmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsymmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZsymmFortran = & hipblasZsymm(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasZsymmFortran ! symmBatched function hipblasSsymmBatchedFortran(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasSsymmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsymmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasSsymmBatchedFortran = & hipblasSsymmBatched(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasSsymmBatchedFortran function hipblasDsymmBatchedFortran(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasDsymmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsymmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasDsymmBatchedFortran = & hipblasDsymmBatched(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasDsymmBatchedFortran function hipblasCsymmBatchedFortran(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasCsymmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsymmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCsymmBatchedFortran = & hipblasCsymmBatched(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasCsymmBatchedFortran function hipblasZsymmBatchedFortran(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasZsymmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsymmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZsymmBatchedFortran = & hipblasZsymmBatched(handle, side, uplo, m, n, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasZsymmBatchedFortran ! symmStridedBatched function hipblasSsymmStridedBatchedFortran(handle, side, uplo, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasSsymmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsymmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasSsymmStridedBatchedFortran = & hipblasSsymmStridedBatched(handle, side, uplo, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasSsymmStridedBatchedFortran function hipblasDsymmStridedBatchedFortran(handle, side, uplo, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasDsymmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsymmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasDsymmStridedBatchedFortran = & hipblasDsymmStridedBatched(handle, side, uplo, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasDsymmStridedBatchedFortran function hipblasCsymmStridedBatchedFortran(handle, side, uplo, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCsymmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsymmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCsymmStridedBatchedFortran = & hipblasCsymmStridedBatched(handle, side, uplo, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasCsymmStridedBatchedFortran function hipblasZsymmStridedBatchedFortran(handle, side, uplo, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZsymmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsymmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZsymmStridedBatchedFortran = & hipblasZsymmStridedBatched(handle, side, uplo, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasZsymmStridedBatchedFortran ! syrk function hipblasSsyrkFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) & bind(c, name='hipblasSsyrkFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyrkFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasSsyrkFortran = & hipblasSsyrk(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) end function hipblasSsyrkFortran function hipblasDsyrkFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) & bind(c, name='hipblasDsyrkFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyrkFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasDsyrkFortran = & hipblasDsyrk(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) end function hipblasDsyrkFortran function hipblasCsyrkFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) & bind(c, name='hipblasCsyrkFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrkFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCsyrkFortran = & hipblasCsyrk(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) end function hipblasCsyrkFortran function hipblasZsyrkFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) & bind(c, name='hipblasZsyrkFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyrkFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZsyrkFortran = & hipblasZsyrk(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc) end function hipblasZsyrkFortran ! syrkBatched function hipblasSsyrkBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) & bind(c, name='hipblasSsyrkBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyrkBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasSsyrkBatchedFortran = & hipblasSsyrkBatched(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) end function hipblasSsyrkBatchedFortran function hipblasDsyrkBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) & bind(c, name='hipblasDsyrkBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyrkBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasDsyrkBatchedFortran = & hipblasDsyrkBatched(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) end function hipblasDsyrkBatchedFortran function hipblasCsyrkBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) & bind(c, name='hipblasCsyrkBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrkBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCsyrkBatchedFortran = & hipblasCsyrkBatched(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) end function hipblasCsyrkBatchedFortran function hipblasZsyrkBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) & bind(c, name='hipblasZsyrkBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyrkBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZsyrkBatchedFortran = & hipblasZsyrkBatched(handle, uplo, transA, n, k, alpha, & A, lda, beta, C, ldc, batch_count) end function hipblasZsyrkBatchedFortran ! syrkStridedBatched function hipblasSsyrkStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasSsyrkStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyrkStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasSsyrkStridedBatchedFortran = & hipblasSsyrkStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) end function hipblasSsyrkStridedBatchedFortran function hipblasDsyrkStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasDsyrkStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyrkStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasDsyrkStridedBatchedFortran = & hipblasDsyrkStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) end function hipblasDsyrkStridedBatchedFortran function hipblasCsyrkStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCsyrkStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrkStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCsyrkStridedBatchedFortran = & hipblasCsyrkStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) end function hipblasCsyrkStridedBatchedFortran function hipblasZsyrkStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZsyrkStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyrkStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZsyrkStridedBatchedFortran = & hipblasZsyrkStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, beta, C, ldc, stride_C, batch_count) end function hipblasZsyrkStridedBatchedFortran ! syr2k function hipblasSsyr2kFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasSsyr2kFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyr2kFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasSsyr2kFortran = & hipblasSsyr2k(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasSsyr2kFortran function hipblasDsyr2kFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasDsyr2kFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyr2kFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasDsyr2kFortran = & hipblasDsyr2k(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasDsyr2kFortran function hipblasCsyr2kFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasCsyr2kFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyr2kFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCsyr2kFortran = & hipblasCsyr2k(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasCsyr2kFortran function hipblasZsyr2kFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasZsyr2kFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyr2kFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZsyr2kFortran = & hipblasZsyr2k(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasZsyr2kFortran ! syr2kBatched function hipblasSsyr2kBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasSsyr2kBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyr2kBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasSsyr2kBatchedFortran = & hipblasSsyr2kBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasSsyr2kBatchedFortran function hipblasDsyr2kBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasDsyr2kBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyr2kBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasDsyr2kBatchedFortran = & hipblasDsyr2kBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasDsyr2kBatchedFortran function hipblasCsyr2kBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasCsyr2kBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyr2kBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCsyr2kBatchedFortran = & hipblasCsyr2kBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasCsyr2kBatchedFortran function hipblasZsyr2kBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasZsyr2kBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyr2kBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZsyr2kBatchedFortran = & hipblasZsyr2kBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasZsyr2kBatchedFortran ! syr2kStridedBatched function hipblasSsyr2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasSsyr2kStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyr2kStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasSsyr2kStridedBatchedFortran = & hipblasSsyr2kStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasSsyr2kStridedBatchedFortran function hipblasDsyr2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasDsyr2kStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyr2kStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasDsyr2kStridedBatchedFortran = & hipblasDsyr2kStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasDsyr2kStridedBatchedFortran function hipblasCsyr2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCsyr2kStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyr2kStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCsyr2kStridedBatchedFortran = & hipblasCsyr2kStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasCsyr2kStridedBatchedFortran function hipblasZsyr2kStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZsyr2kStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyr2kStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZsyr2kStridedBatchedFortran = & hipblasZsyr2kStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasZsyr2kStridedBatchedFortran ! syrkx function hipblasSsyrkxFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasSsyrkxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyrkxFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasSsyrkxFortran = & hipblasSsyrkx(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasSsyrkxFortran function hipblasDsyrkxFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasDsyrkxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyrkxFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasDsyrkxFortran = & hipblasDsyrkx(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasDsyrkxFortran function hipblasCsyrkxFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasCsyrkxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrkxFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCsyrkxFortran = & hipblasCsyrkx(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasCsyrkxFortran function hipblasZsyrkxFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasZsyrkxFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyrkxFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZsyrkxFortran = & hipblasZsyrkx(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasZsyrkxFortran ! syrkxBatched function hipblasSsyrkxBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasSsyrkxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyrkxBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasSsyrkxBatchedFortran = & hipblasSsyrkxBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasSsyrkxBatchedFortran function hipblasDsyrkxBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasDsyrkxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyrkxBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasDsyrkxBatchedFortran = & hipblasDsyrkxBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasDsyrkxBatchedFortran function hipblasCsyrkxBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasCsyrkxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrkxBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCsyrkxBatchedFortran = & hipblasCsyrkxBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasCsyrkxBatchedFortran function hipblasZsyrkxBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasZsyrkxBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyrkxBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZsyrkxBatchedFortran = & hipblasZsyrkxBatched(handle, uplo, transA, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasZsyrkxBatchedFortran ! syrkxStridedBatched function hipblasSsyrkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasSsyrkxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSsyrkxStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasSsyrkxStridedBatchedFortran = & hipblasSsyrkxStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasSsyrkxStridedBatchedFortran function hipblasDsyrkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasDsyrkxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDsyrkxStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasDsyrkxStridedBatchedFortran = & hipblasDsyrkxStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasDsyrkxStridedBatchedFortran function hipblasCsyrkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCsyrkxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrkxStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCsyrkxStridedBatchedFortran = & hipblasCsyrkxStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasCsyrkxStridedBatchedFortran function hipblasZsyrkxStridedBatchedFortran(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZsyrkxStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZsyrkxStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZsyrkxStridedBatchedFortran = & hipblasZsyrkxStridedBatched(handle, uplo, transA, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasZsyrkxStridedBatchedFortran ! trmm function hipblasStrmmFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) & bind(c, name='hipblasStrmmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrmmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasStrmmFortran = & hipblasStrmm(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) end function hipblasStrmmFortran function hipblasDtrmmFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) & bind(c, name='hipblasDtrmmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrmmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasDtrmmFortran = & hipblasDtrmm(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) end function hipblasDtrmmFortran function hipblasCtrmmFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) & bind(c, name='hipblasCtrmmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrmmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasCtrmmFortran = & hipblasCtrmm(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) end function hipblasCtrmmFortran function hipblasZtrmmFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) & bind(c, name='hipblasZtrmmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrmmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasZtrmmFortran = & hipblasZtrmm(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) end function hipblasZtrmmFortran ! trmmBatched function hipblasStrmmBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) & bind(c, name='hipblasStrmmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrmmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: batch_count hipblasStrmmBatchedFortran = & hipblasStrmmBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) end function hipblasStrmmBatchedFortran function hipblasDtrmmBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) & bind(c, name='hipblasDtrmmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrmmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: batch_count hipblasDtrmmBatchedFortran = & hipblasDtrmmBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) end function hipblasDtrmmBatchedFortran function hipblasCtrmmBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) & bind(c, name='hipblasCtrmmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrmmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: batch_count hipblasCtrmmBatchedFortran = & hipblasCtrmmBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) end function hipblasCtrmmBatchedFortran function hipblasZtrmmBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) & bind(c, name='hipblasZtrmmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrmmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: batch_count hipblasZtrmmBatchedFortran = & hipblasZtrmmBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) end function hipblasZtrmmBatchedFortran ! trmmStridedBatched function hipblasStrmmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) & bind(c, name='hipblasStrmmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrmmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B integer(c_int), value :: batch_count hipblasStrmmStridedBatchedFortran = & hipblasStrmmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) end function hipblasStrmmStridedBatchedFortran function hipblasDtrmmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) & bind(c, name='hipblasDtrmmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrmmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B integer(c_int), value :: batch_count hipblasDtrmmStridedBatchedFortran = & hipblasDtrmmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) end function hipblasDtrmmStridedBatchedFortran function hipblasCtrmmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) & bind(c, name='hipblasCtrmmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrmmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B integer(c_int), value :: batch_count hipblasCtrmmStridedBatchedFortran = & hipblasCtrmmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) end function hipblasCtrmmStridedBatchedFortran function hipblasZtrmmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) & bind(c, name='hipblasZtrmmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrmmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B integer(c_int), value :: batch_count hipblasZtrmmStridedBatchedFortran = & hipblasZtrmmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) end function hipblasZtrmmStridedBatchedFortran ! trtri function hipblasStrtriFortran(handle, uplo, diag, n, & A, lda, invA, ldinvA) & bind(c, name='hipblasStrtriFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrtriFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: invA integer(c_int), value :: ldinvA hipblasStrtriFortran = & hipblasStrtri(handle, uplo, diag, n, & A, lda, invA, ldinvA) end function hipblasStrtriFortran function hipblasDtrtriFortran(handle, uplo, diag, n, & A, lda, invA, ldinvA) & bind(c, name='hipblasDtrtriFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrtriFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: invA integer(c_int), value :: ldinvA hipblasDtrtriFortran = & hipblasDtrtri(handle, uplo, diag, n, & A, lda, invA, ldinvA) end function hipblasDtrtriFortran function hipblasCtrtriFortran(handle, uplo, diag, n, & A, lda, invA, ldinvA) & bind(c, name='hipblasCtrtriFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrtriFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: invA integer(c_int), value :: ldinvA hipblasCtrtriFortran = & hipblasCtrtri(handle, uplo, diag, n, & A, lda, invA, ldinvA) end function hipblasCtrtriFortran function hipblasZtrtriFortran(handle, uplo, diag, n, & A, lda, invA, ldinvA) & bind(c, name='hipblasZtrtriFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrtriFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: invA integer(c_int), value :: ldinvA hipblasZtrtriFortran = & hipblasZtrtri(handle, uplo, diag, n, & A, lda, invA, ldinvA) end function hipblasZtrtriFortran ! trtriBatched function hipblasStrtriBatchedFortran(handle, uplo, diag, n, & A, lda, invA, ldinvA, batch_count) & bind(c, name='hipblasStrtriBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrtriBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: invA integer(c_int), value :: ldinvA integer(c_int), value :: batch_count hipblasStrtriBatchedFortran = & hipblasStrtriBatched(handle, uplo, diag, n, & A, lda, invA, ldinvA, batch_count) end function hipblasStrtriBatchedFortran function hipblasDtrtriBatchedFortran(handle, uplo, diag, n, & A, lda, invA, ldinvA, batch_count) & bind(c, name='hipblasDtrtriBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrtriBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: invA integer(c_int), value :: ldinvA integer(c_int), value :: batch_count hipblasDtrtriBatchedFortran = & hipblasDtrtriBatched(handle, uplo, diag, n, & A, lda, invA, ldinvA, batch_count) end function hipblasDtrtriBatchedFortran function hipblasCtrtriBatchedFortran(handle, uplo, diag, n, & A, lda, invA, ldinvA, batch_count) & bind(c, name='hipblasCtrtriBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrtriBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: invA integer(c_int), value :: ldinvA integer(c_int), value :: batch_count hipblasCtrtriBatchedFortran = & hipblasCtrtriBatched(handle, uplo, diag, n, & A, lda, invA, ldinvA, batch_count) end function hipblasCtrtriBatchedFortran function hipblasZtrtriBatchedFortran(handle, uplo, diag, n, & A, lda, invA, ldinvA, batch_count) & bind(c, name='hipblasZtrtriBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrtriBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: invA integer(c_int), value :: ldinvA integer(c_int), value :: batch_count hipblasZtrtriBatchedFortran = & hipblasZtrtriBatched(handle, uplo, diag, n, & A, lda, invA, ldinvA, batch_count) end function hipblasZtrtriBatchedFortran ! trtriStridedBatched function hipblasStrtriStridedBatchedFortran(handle, uplo, diag, n, & A, lda, stride_A, invA, ldinvA, stride_invA, batch_count) & bind(c, name='hipblasStrtriStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrtriStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: invA integer(c_int), value :: ldinvA integer(c_int64_t), value :: stride_invA integer(c_int), value :: batch_count hipblasStrtriStridedBatchedFortran = & hipblasStrtriStridedBatched(handle, uplo, diag, n, & A, lda, stride_A, invA, ldinvA, stride_invA, batch_count) end function hipblasStrtriStridedBatchedFortran function hipblasDtrtriStridedBatchedFortran(handle, uplo, diag, n, & A, lda, stride_A, invA, ldinvA, stride_invA, batch_count) & bind(c, name='hipblasDtrtriStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrtriStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: invA integer(c_int), value :: ldinvA integer(c_int64_t), value :: stride_invA integer(c_int), value :: batch_count hipblasDtrtriStridedBatchedFortran = & hipblasDtrtriStridedBatched(handle, uplo, diag, n, & A, lda, stride_A, invA, ldinvA, stride_invA, batch_count) end function hipblasDtrtriStridedBatchedFortran function hipblasCtrtriStridedBatchedFortran(handle, uplo, diag, n, & A, lda, stride_A, invA, ldinvA, stride_invA, batch_count) & bind(c, name='hipblasCtrtriStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrtriStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: invA integer(c_int), value :: ldinvA integer(c_int64_t), value :: stride_invA integer(c_int), value :: batch_count hipblasCtrtriStridedBatchedFortran = & hipblasCtrtriStridedBatched(handle, uplo, diag, n, & A, lda, stride_A, invA, ldinvA, stride_invA, batch_count) end function hipblasCtrtriStridedBatchedFortran function hipblasZtrtriStridedBatchedFortran(handle, uplo, diag, n, & A, lda, stride_A, invA, ldinvA, stride_invA, batch_count) & bind(c, name='hipblasZtrtriStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrtriStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: invA integer(c_int), value :: ldinvA integer(c_int64_t), value :: stride_invA integer(c_int), value :: batch_count hipblasZtrtriStridedBatchedFortran = & hipblasZtrtriStridedBatched(handle, uplo, diag, n, & A, lda, stride_A, invA, ldinvA, stride_invA, batch_count) end function hipblasZtrtriStridedBatchedFortran ! trsm function hipblasStrsmFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) & bind(c, name='hipblasStrsmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrsmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasStrsmFortran = & hipblasStrsm(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) end function hipblasStrsmFortran function hipblasDtrsmFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) & bind(c, name='hipblasDtrsmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrsmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasDtrsmFortran = & hipblasDtrsm(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) end function hipblasDtrsmFortran function hipblasCtrsmFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) & bind(c, name='hipblasCtrsmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrsmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasCtrsmFortran = & hipblasCtrsm(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) end function hipblasCtrsmFortran function hipblasZtrsmFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) & bind(c, name='hipblasZtrsmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrsmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb hipblasZtrsmFortran = & hipblasZtrsm(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb) end function hipblasZtrsmFortran ! trsmBatched function hipblasStrsmBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) & bind(c, name='hipblasStrsmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrsmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: batch_count hipblasStrsmBatchedFortran = & hipblasStrsmBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) end function hipblasStrsmBatchedFortran function hipblasDtrsmBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) & bind(c, name='hipblasDtrsmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrsmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: batch_count hipblasDtrsmBatchedFortran = & hipblasDtrsmBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) end function hipblasDtrsmBatchedFortran function hipblasCtrsmBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) & bind(c, name='hipblasCtrsmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrsmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: batch_count hipblasCtrsmBatchedFortran = & hipblasCtrsmBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) end function hipblasCtrsmBatchedFortran function hipblasZtrsmBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) & bind(c, name='hipblasZtrsmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrsmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: batch_count hipblasZtrsmBatchedFortran = & hipblasZtrsmBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count) end function hipblasZtrsmBatchedFortran ! trsmStridedBatched function hipblasStrsmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) & bind(c, name='hipblasStrsmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasStrsmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B integer(c_int), value :: batch_count hipblasStrsmStridedBatchedFortran = & hipblasStrsmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) end function hipblasStrsmStridedBatchedFortran function hipblasDtrsmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) & bind(c, name='hipblasDtrsmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDtrsmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B integer(c_int), value :: batch_count hipblasDtrsmStridedBatchedFortran = & hipblasDtrsmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) end function hipblasDtrsmStridedBatchedFortran function hipblasCtrsmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) & bind(c, name='hipblasCtrsmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCtrsmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B integer(c_int), value :: batch_count hipblasCtrsmStridedBatchedFortran = & hipblasCtrsmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) end function hipblasCtrsmStridedBatchedFortran function hipblasZtrsmStridedBatchedFortran(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) & bind(c, name='hipblasZtrsmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZtrsmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B integer(c_int), value :: batch_count hipblasZtrsmStridedBatchedFortran = & hipblasZtrsmStridedBatched(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count) end function hipblasZtrsmStridedBatchedFortran ! gemm function hipblasHgemmFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasHgemmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasHgemmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasHgemmFortran = & hipblasHgemm(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasHgemmFortran function hipblasSgemmFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasSgemmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgemmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasSgemmFortran = & hipblasSgemm(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasSgemmFortran function hipblasDgemmFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasDgemmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgemmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasDgemmFortran = & hipblasDgemm(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasDgemmFortran function hipblasCgemmFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasCgemmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgemmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCgemmFortran = & hipblasCgemm(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasCgemmFortran function hipblasZgemmFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) & bind(c, name='hipblasZgemmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgemmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZgemmFortran = & hipblasZgemm(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc) end function hipblasZgemmFortran ! gemmBatched function hipblasHgemmBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasHgemmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasHgemmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasHgemmBatchedFortran = & hipblasHgemmBatched(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasHgemmBatchedFortran function hipblasSgemmBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasSgemmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgemmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasSgemmBatchedFortran = & hipblasSgemmBatched(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasSgemmBatchedFortran function hipblasDgemmBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasDgemmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgemmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasDgemmBatchedFortran = & hipblasDgemmBatched(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasDgemmBatchedFortran function hipblasCgemmBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasCgemmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgemmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCgemmBatchedFortran = & hipblasCgemmBatched(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasCgemmBatchedFortran function hipblasZgemmBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) & bind(c, name='hipblasZgemmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgemmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZgemmBatchedFortran = & hipblasZgemmBatched(handle, transA, transB, m, n, k, alpha, & A, lda, B, ldb, beta, C, ldc, batch_count) end function hipblasZgemmBatchedFortran ! gemmStridedBatched function hipblasHgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasHgemmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasHgemmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasHgemmStridedBatchedFortran = & hipblasHgemmStridedBatched(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasHgemmStridedBatchedFortran function hipblasSgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasSgemmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgemmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasSgemmStridedBatchedFortran = & hipblasSgemmStridedBatched(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasSgemmStridedBatchedFortran function hipblasDgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasDgemmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgemmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasDgemmStridedBatchedFortran = & hipblasDgemmStridedBatched(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasDgemmStridedBatchedFortran function hipblasCgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCgemmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgemmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCgemmStridedBatchedFortran = & hipblasCgemmStridedBatched(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasCgemmStridedBatchedFortran function hipblasZgemmStridedBatchedFortran(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZgemmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgemmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: beta type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZgemmStridedBatchedFortran = & hipblasZgemmStridedBatched(handle, transA, transB, m, n, k, alpha, & A, lda, stride_A, B, ldb, stride_B, beta, C, ldc, stride_C, batch_count) end function hipblasZgemmStridedBatchedFortran ! dgmm function hipblasSdgmmFortran(handle, side, m, n, & A, lda, x, incx, C, ldc) & bind(c, name='hipblasSdgmmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSdgmmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: C integer(c_int), value :: ldc hipblasSdgmmFortran = & hipblasSdgmm(handle, side, m, n, & A, lda, x, incx, C, ldc) end function hipblasSdgmmFortran function hipblasDdgmmFortran(handle, side, m, n, & A, lda, x, incx, C, ldc) & bind(c, name='hipblasDdgmmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDdgmmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: C integer(c_int), value :: ldc hipblasDdgmmFortran = & hipblasDdgmm(handle, side, m, n, & A, lda, x, incx, C, ldc) end function hipblasDdgmmFortran function hipblasCdgmmFortran(handle, side, m, n, & A, lda, x, incx, C, ldc) & bind(c, name='hipblasCdgmmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCdgmmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCdgmmFortran = & hipblasCdgmm(handle, side, m, n, & A, lda, x, incx, C, ldc) end function hipblasCdgmmFortran function hipblasZdgmmFortran(handle, side, m, n, & A, lda, x, incx, C, ldc) & bind(c, name='hipblasZdgmmFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdgmmFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZdgmmFortran = & hipblasZdgmm(handle, side, m, n, & A, lda, x, incx, C, ldc) end function hipblasZdgmmFortran ! dgmmBatched function hipblasSdgmmBatchedFortran(handle, side, m, n, & A, lda, x, incx, C, ldc, batch_count) & bind(c, name='hipblasSdgmmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSdgmmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasSdgmmBatchedFortran = & hipblasSdgmmBatched(handle, side, m, n, & A, lda, x, incx, C, ldc, batch_count) end function hipblasSdgmmBatchedFortran function hipblasDdgmmBatchedFortran(handle, side, m, n, & A, lda, x, incx, C, ldc, batch_count) & bind(c, name='hipblasDdgmmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDdgmmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasDdgmmBatchedFortran = & hipblasDdgmmBatched(handle, side, m, n, & A, lda, x, incx, C, ldc, batch_count) end function hipblasDdgmmBatchedFortran function hipblasCdgmmBatchedFortran(handle, side, m, n, & A, lda, x, incx, C, ldc, batch_count) & bind(c, name='hipblasCdgmmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCdgmmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCdgmmBatchedFortran = & hipblasCdgmmBatched(handle, side, m, n, & A, lda, x, incx, C, ldc, batch_count) end function hipblasCdgmmBatchedFortran function hipblasZdgmmBatchedFortran(handle, side, m, n, & A, lda, x, incx, C, ldc, batch_count) & bind(c, name='hipblasZdgmmBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdgmmBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: x integer(c_int), value :: incx type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZdgmmBatchedFortran = & hipblasZdgmmBatched(handle, side, m, n, & A, lda, x, incx, C, ldc, batch_count) end function hipblasZdgmmBatchedFortran ! dgmmStridedBatched function hipblasSdgmmStridedBatchedFortran(handle, side, m, n, & A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count) & bind(c, name='hipblasSdgmmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSdgmmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasSdgmmStridedBatchedFortran = & hipblasSdgmmStridedBatched(handle, side, m, n, & A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count) end function hipblasSdgmmStridedBatchedFortran function hipblasDdgmmStridedBatchedFortran(handle, side, m, n, & A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count) & bind(c, name='hipblasDdgmmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDdgmmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasDdgmmStridedBatchedFortran = & hipblasDdgmmStridedBatched(handle, side, m, n, & A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count) end function hipblasDdgmmStridedBatchedFortran function hipblasCdgmmStridedBatchedFortran(handle, side, m, n, & A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCdgmmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCdgmmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCdgmmStridedBatchedFortran = & hipblasCdgmmStridedBatched(handle, side, m, n, & A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count) end function hipblasCdgmmStridedBatchedFortran function hipblasZdgmmStridedBatchedFortran(handle, side, m, n, & A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZdgmmStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZdgmmStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: x integer(c_int), value :: incx integer(c_int64_t), value :: stride_x type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZdgmmStridedBatchedFortran = & hipblasZdgmmStridedBatched(handle, side, m, n, & A, lda, stride_A, x, incx, stride_x, C, ldc, stride_C, batch_count) end function hipblasZdgmmStridedBatchedFortran ! geam function hipblasSgeamFortran(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc) & bind(c, name='hipblasSgeamFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgeamFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: C integer(c_int), value :: ldc hipblasSgeamFortran = & hipblasSgeam(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc) end function hipblasSgeamFortran function hipblasDgeamFortran(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc) & bind(c, name='hipblasDgeamFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgeamFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: C integer(c_int), value :: ldc hipblasDgeamFortran = & hipblasDgeam(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc) end function hipblasDgeamFortran function hipblasCgeamFortran(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc) & bind(c, name='hipblasCgeamFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgeamFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: C integer(c_int), value :: ldc hipblasCgeamFortran = & hipblasCgeam(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc) end function hipblasCgeamFortran function hipblasZgeamFortran(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc) & bind(c, name='hipblasZgeamFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgeamFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: C integer(c_int), value :: ldc hipblasZgeamFortran = & hipblasZgeam(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc) end function hipblasZgeamFortran ! geamBatched function hipblasSgeamBatchedFortran(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc, batch_count) & bind(c, name='hipblasSgeamBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgeamBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasSgeamBatchedFortran = & hipblasSgeamBatched(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc, batch_count) end function hipblasSgeamBatchedFortran function hipblasDgeamBatchedFortran(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc, batch_count) & bind(c, name='hipblasDgeamBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgeamBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasDgeamBatchedFortran = & hipblasDgeamBatched(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc, batch_count) end function hipblasDgeamBatchedFortran function hipblasCgeamBatchedFortran(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc, batch_count) & bind(c, name='hipblasCgeamBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgeamBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasCgeamBatchedFortran = & hipblasCgeamBatched(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc, batch_count) end function hipblasCgeamBatchedFortran function hipblasZgeamBatchedFortran(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc, batch_count) & bind(c, name='hipblasZgeamBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgeamBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int), value :: batch_count hipblasZgeamBatchedFortran = & hipblasZgeamBatched(handle, transA, transB, m, n, alpha, & A, lda, beta, B, ldb, C, ldc, batch_count) end function hipblasZgeamBatchedFortran ! geamStridedBatched function hipblasSgeamStridedBatchedFortran(handle, transA, transB, m, n, alpha, & A, lda, stride_A, beta, B, ldb, stride_B, C, ldc, stride_C, batch_count) & bind(c, name='hipblasSgeamStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgeamStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasSgeamStridedBatchedFortran = & hipblasSgeamStridedBatched(handle, transA, transB, m, n, alpha, & A, lda, stride_A, beta, B, ldb, stride_B, C, ldc, stride_C, batch_count) end function hipblasSgeamStridedBatchedFortran function hipblasDgeamStridedBatchedFortran(handle, transA, transB, m, n, alpha, & A, lda, stride_A, beta, B, ldb, stride_B, C, ldc, stride_C, batch_count) & bind(c, name='hipblasDgeamStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgeamStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasDgeamStridedBatchedFortran = & hipblasDgeamStridedBatched(handle, transA, transB, m, n, alpha, & A, lda, stride_A, beta, B, ldb, stride_B, C, ldc, stride_C, batch_count) end function hipblasDgeamStridedBatchedFortran function hipblasCgeamStridedBatchedFortran(handle, transA, transB, m, n, alpha, & A, lda, stride_A, beta, B, ldb, stride_B, C, ldc, stride_C, batch_count) & bind(c, name='hipblasCgeamStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgeamStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasCgeamStridedBatchedFortran = & hipblasCgeamStridedBatched(handle, transA, transB, m, n, alpha, & A, lda, stride_A, beta, B, ldb, stride_B, C, ldc, stride_C, batch_count) end function hipblasCgeamStridedBatchedFortran function hipblasZgeamStridedBatchedFortran(handle, transA, transB, m, n, alpha, & A, lda, stride_A, beta, B, ldb, stride_B, C, ldc, stride_C, batch_count) & bind(c, name='hipblasZgeamStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgeamStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: beta type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B type(c_ptr), value :: C integer(c_int), value :: ldc integer(c_int64_t), value :: stride_C integer(c_int), value :: batch_count hipblasZgeamStridedBatchedFortran = & hipblasZgeamStridedBatched(handle, transA, transB, m, n, alpha, & A, lda, stride_A, beta, B, ldb, stride_B, C, ldc, stride_C, batch_count) end function hipblasZgeamStridedBatchedFortran !-----------------! ! blas Extensions ! !-----------------! ! gemmEx function hipblasGemmExFortran(handle, transA, transB, m, n, k, alpha, a, a_type, lda, & b, b_type, ldb, beta, c, c_type, ldc, & compute_type, algo, solution_index, flags) & bind(c, name='hipblasGemmExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasGemmExFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: a integer(kind(HIPBLAS_R_16F)), value :: a_type integer(c_int), value :: lda type(c_ptr), value :: b integer(kind(HIPBLAS_R_16F)), value :: b_type integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: c integer(kind(HIPBLAS_R_16F)), value :: c_type integer(c_int), value :: ldc integer(kind(HIPBLAS_R_16F)), value :: compute_type integer(kind(HIPBLAS_GEMM_DEFAULT)), value :: algo integer(c_int32_t), value :: solution_index ! No unsigned types in fortran. If larger values are needed ! we will need a workaround. integer(c_int32_t), value :: flags hipblasGemmExFortran = & hipblasGemmEx(handle, transA, transB, m, n, k, alpha, & a, a_type, lda, b, b_type, ldb, beta, c, c_type, ldc, & compute_type, algo, solution_index, flags) end function hipblasGemmExFortran function hipblasGemmBatchedExFortran(handle, transA, transB, m, n, k, alpha, a, a_type, lda, & b, b_type, ldb, beta, c, c_type, ldc, & batch_count, compute_type, algo, solution_index, flags) & bind(c, name='hipblasGemmBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasGemmBatchedExFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: a integer(kind(HIPBLAS_R_16F)), value :: a_type integer(c_int), value :: lda type(c_ptr), value :: b integer(kind(HIPBLAS_R_16F)), value :: b_type integer(c_int), value :: ldb type(c_ptr), value :: beta type(c_ptr), value :: c integer(kind(HIPBLAS_R_16F)), value :: c_type integer(c_int), value :: ldc integer(c_int), value :: batch_count integer(kind(HIPBLAS_R_16F)), value :: compute_type integer(kind(HIPBLAS_GEMM_DEFAULT)), value :: algo integer(c_int32_t), value :: solution_index ! No unsigned types in fortran. If larger values are needed ! we will need a workaround. integer(c_int32_t), value :: flags hipblasGemmBatchedExFortran = & hipblasGemmBatchedEx(handle, transA, transB, m, n, k, alpha, & a, a_type, lda, b, b_type, ldb, beta, c, c_type, ldc, & batch_count, compute_type, algo, solution_index, flags) end function hipblasGemmBatchedExFortran function hipblasGemmStridedBatchedExFortran(handle, transA, transB, m, n, k, alpha, a, a_type, lda, stride_a, & b, b_type, ldb, stride_b, beta, c, c_type, ldc, stride_c, & batch_count, compute_type, algo, solution_index, flags) & bind(c, name='hipblasGemmStridedBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasGemmStridedBatchedExFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_OP_N)), value :: transB integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: alpha type(c_ptr), value :: a integer(kind(HIPBLAS_R_16F)), value :: a_type integer(c_int), value :: lda integer(c_int64_t), value :: stride_a type(c_ptr), value :: b integer(kind(HIPBLAS_R_16F)), value :: b_type integer(c_int), value :: ldb integer(c_int64_t), value :: stride_b type(c_ptr), value :: beta type(c_ptr), value :: c integer(kind(HIPBLAS_R_16F)), value :: c_type integer(c_int), value :: ldc integer(c_int64_t), value :: stride_c integer(c_int), value :: batch_count integer(kind(HIPBLAS_R_16F)), value :: compute_type integer(kind(HIPBLAS_GEMM_DEFAULT)), value :: algo integer(c_int32_t), value :: solution_index ! No unsigned types in fortran. If larger values are needed ! we will need a workaround. integer(c_int32_t), value :: flags hipblasGemmStridedBatchedExFortran = & hipblasGemmStridedBatchedEx(handle, transA, transB, m, n, k, alpha, & a, a_type, lda, stride_a, b, b_type, ldb, stride_b, beta, c, c_type, ldc, stride_c, & batch_count, compute_type, algo, solution_index, flags) end function hipblasGemmStridedBatchedExFortran ! trsmEx function hipblasTrsmExFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, & B, ldb, invA, invA_size, compute_type) & bind(c, name='hipblasTrsmExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasTrsmExFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_UPPER)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: invA integer(c_int), value :: invA_size integer(kind(HIPBLAS_R_16F)), value :: compute_type hipblasTrsmExFortran = & hipblasTrsmEx(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, invA, invA_size, compute_type) end function hipblasTrsmExFortran function hipblasTrsmBatchedExFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, & B, ldb, batch_count, invA, invA_size, compute_type) & bind(c, name='hipblasTrsmBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasTrsmBatchedExFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_UPPER)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: batch_count type(c_ptr), value :: invA integer(c_int), value :: invA_size integer(kind(HIPBLAS_R_16F)), value :: compute_type hipblasTrsmBatchedExFortran = & hipblasTrsmBatchedEx(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, B, ldb, batch_count, invA, invA_size, compute_type) end function hipblasTrsmBatchedExFortran function hipblasTrsmStridedBatchedExFortran(handle, side, uplo, transA, diag, m, n, alpha, A, lda, stride_A, & B, ldb, stride_B, batch_count, invA, invA_size, stride_invA, compute_type) & bind(c, name='hipblasTrsmStridedBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasTrsmStridedBatchedExFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_SIDE_LEFT)), value :: side integer(kind(HIPBLAS_FILL_MODE_UPPER)), value :: uplo integer(kind(HIPBLAS_OP_N)), value :: transA integer(kind(HIPBLAS_DIAG_UNIT)), value :: diag integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: alpha type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: stride_A type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: stride_B integer(c_int), value :: batch_count type(c_ptr), value :: invA integer(c_int), value :: invA_size integer(c_int64_t), value :: stride_invA integer(kind(HIPBLAS_R_16F)), value :: compute_type hipblasTrsmStridedBatchedExFortran = & hipblasTrsmStridedBatchedEx(handle, side, uplo, transA, diag, m, n, alpha, & A, lda, stride_A, B, ldb, stride_B, batch_count, invA, invA_size, stride_invA, compute_type) end function hipblasTrsmStridedBatchedExFortran ! AxpyEx function hipblasAxpyExFortran(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, executionType) & bind(c, name='hipblasAxpyExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasAxpyExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha integer(kind(HIPBLAS_R_16F)), value :: alphaType type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasAxpyExFortran = & hipblasAxpyEx(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, executionType) return end function hipblasAxpyExFortran function hipblasAxpyBatchedExFortran(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, batch_count, executionType) & bind(c, name='hipblasAxpyBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasAxpyBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha integer(kind(HIPBLAS_R_16F)), value :: alphaType type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy integer(c_int), value :: batch_count integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasAxpyBatchedExFortran = & hipblasAxpyBatchedEx(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, batch_count, executionType) return end function hipblasAxpyBatchedExFortran function hipblasAxpyStridedBatchedExFortran(handle, n, alpha, alphaType, x, xType, incx, stridex, & y, yType, incy, stridey, batch_count, executionType) & bind(c, name='hipblasAxpyStridedBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasAxpyStridedBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha integer(kind(HIPBLAS_R_16F)), value :: alphaType type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx integer(c_int64_t), value :: stridex type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy integer(c_int64_t), value :: stridey integer(c_int), value :: batch_count integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasAxpyStridedBatchedExFortran = & hipblasAxpyStridedBatchedEx(handle, n, alpha, alphaType, x, xType, incx, stridex, & y, yType, incy, stridey, batch_count, executionType) return end function hipblasAxpyStridedBatchedExFortran ! DotEx function hipblasDotExFortran(handle, n, x, xType, incx, y, yType, incy, result, & resultType, executionType) & bind(c, name='hipblasDotExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDotExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy type(c_ptr), value :: result integer(kind(HIPBLAS_R_16F)), value :: resultType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasDotExFortran = & hipblasDotEx(handle, n, x, xType, incx, y, yType, incy, result, resultType, executionType) return end function hipblasDotExFortran function hipblasDotcExFortran(handle, n, x, xType, incx, y, yType, incy, result, & resultType, executionType) & bind(c, name='hipblasDotcExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDotcExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy type(c_ptr), value :: result integer(kind(HIPBLAS_R_16F)), value :: resultType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasDotcExFortran = & hipblasDotcEx(handle, n, x, xType, incx, y, yType, incy, result, resultType, executionType) return end function hipblasDotcExFortran function hipblasDotBatchedExFortran(handle, n, x, xType, incx, y, yType, incy, batch_count, result, & resultType, executionType) & bind(c, name='hipblasDotBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDotBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result integer(kind(HIPBLAS_R_16F)), value :: resultType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasDotBatchedExFortran = & hipblasDotBatchedEx(handle, n, x, xType, incx, y, yType, incy, batch_count, result, resultType, executionType) return end function hipblasDotBatchedExFortran function hipblasDotcBatchedExFortran(handle, n, x, xType, incx, y, yType, incy, batch_count, result, & resultType, executionType) & bind(c, name='hipblasDotcBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDotcBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy integer(c_int), value :: batch_count type(c_ptr), value :: result integer(kind(HIPBLAS_R_16F)), value :: resultType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasDotcBatchedExFortran = & hipblasDotcBatchedEx(handle, n, x, xType, incx, y, yType, incy, batch_count, result, resultType, executionType) return end function hipblasDotcBatchedExFortran function hipblasDotStridedBatchedExFortran(handle, n, x, xType, incx, stridex, & y, yType, incy, stridey, batch_count, result, resultType, executionType) & bind(c, name='hipblasDotStridedBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDotStridedBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx integer(c_int64_t), value :: stridex type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy integer(c_int64_t), value :: stridey integer(c_int), value :: batch_count type(c_ptr), value :: result integer(kind(HIPBLAS_R_16F)), value :: resultType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasDotStridedBatchedExFortran = & hipblasDotStridedBatchedEx(handle, n, x, xType, incx, stridex, & y, yType, incy, stridey, batch_count, result, resultType, executionType) return end function hipblasDotStridedBatchedExFortran function hipblasDotcStridedBatchedExFortran(handle, n, x, xType, incx, stridex, & y, yType, incy, stridey, batch_count, result, resultType, executionType) & bind(c, name='hipblasDotcStridedBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDotcStridedBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx integer(c_int64_t), value :: stridex type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy integer(c_int64_t), value :: stridey integer(c_int), value :: batch_count type(c_ptr), value :: result integer(kind(HIPBLAS_R_16F)), value :: resultType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasDotcStridedBatchedExFortran = & hipblasDotcStridedBatchedEx(handle, n, x, xType, incx, stridex, & y, yType, incy, stridey, batch_count, result, resultType, executionType) return end function hipblasDotcStridedBatchedExFortran ! Nrm2Ex function hipblasNrm2ExFortran(handle, n, x, xType, incx, result, resultType, executionType) & bind(c, name='hipblasNrm2ExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasNrm2ExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx type(c_ptr), value :: result integer(kind(HIPBLAS_R_16F)), value :: resultType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasNrm2ExFortran = & hipblasNrm2Ex(handle, n, x, xType, incx, result, resultType, executionType) return end function hipblasNrm2ExFortran function hipblasNrm2BatchedExFortran(handle, n, x, xType, incx, batch_count, result, resultType, executionType) & bind(c, name='hipblasNrm2BatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasNrm2BatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx integer(c_int), value :: batch_count type(c_ptr), value :: result integer(kind(HIPBLAS_R_16F)), value :: resultType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasNrm2BatchedExFortran = & hipblasNrm2BatchedEx(handle, n, x, xType, incx, batch_count, result, resultType, executionType) return end function hipblasNrm2BatchedExFortran function hipblasNrm2StridedBatchedExFortran(handle, n, x, xType, incx, stridex, & batch_count, result, resultType, executionType) & bind(c, name='hipblasNrm2StridedBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasNrm2StridedBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx integer(c_int64_t), value :: stridex integer(c_int), value :: batch_count type(c_ptr), value :: result integer(kind(HIPBLAS_R_16F)), value :: resultType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasNrm2StridedBatchedExFortran = & hipblasNrm2StridedBatchedEx(handle, n, x, xType, incx, stridex, & batch_count, result, resultType, executionType) return end function hipblasNrm2StridedBatchedExFortran ! RotEx function hipblasRotExFortran(handle, n, x, xType, incx, y, yType, incy, c, s, & csType, executionType) & bind(c, name='hipblasRotExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasRotExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s integer(kind(HIPBLAS_R_16F)), value :: csType integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasRotExFortran = & hipblasRotEx(handle, n, x, xType, incx, y, yType, incy, c, s, csType, executionType) return end function hipblasRotExFortran function hipblasRotBatchedExFortran(handle, n, x, xType, incx, y, yType, incy, c, s, & csType, batch_count, executionType) & bind(c, name='hipblasRotBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasRotBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy type(c_ptr), value :: c type(c_ptr), value :: s integer(kind(HIPBLAS_R_16F)), value :: csType integer(c_int), value :: batch_count integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasRotBatchedExFortran = & hipblasRotBatchedEx(handle, n, x, xType, incx, y, yType, incy, c, s, csType, batch_count, executionType) return end function hipblasRotBatchedExFortran function hipblasRotStridedBatchedExFortran(handle, n, x, xType, incx, stridex, & y, yType, incy, stridey, c, s, csType, batch_count, executionType) & bind(c, name='hipblasRotStridedBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasRotStridedBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx integer(c_int64_t), value :: stridex type(c_ptr), value :: y integer(kind(HIPBLAS_R_16F)), value :: yType integer(c_int), value :: incy integer(c_int64_t), value :: stridey type(c_ptr), value :: c type(c_ptr), value :: s integer(kind(HIPBLAS_R_16F)), value :: csType integer(c_int), value :: batch_count integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasRotStridedBatchedExFortran = & hipblasRotStridedBatchedEx(handle, n, x, xType, incx, stridex, & y, yType, incy, stridey, c, s, csType, batch_count, executionType) return end function hipblasRotStridedBatchedExFortran ! ScalEx function hipblasScalExFortran(handle, n, alpha, alphaType, x, xType, incx, executionType) & bind(c, name='hipblasScalExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScalExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha integer(kind(HIPBLAS_R_16F)), value :: alphaType type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasScalExFortran = & hipblasScalEx(handle, n, alpha, alphaType, x, xType, incx, executionType) return end function hipblasScalExFortran function hipblasScalBatchedExFortran(handle, n, alpha, alphaType, x, xType, incx, batch_count, executionType) & bind(c, name='hipblasScalBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScalBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha integer(kind(HIPBLAS_R_16F)), value :: alphaType type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx integer(c_int), value :: batch_count integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasScalBatchedExFortran = & hipblasScalBatchedEx(handle, n, alpha, alphaType, x, xType, incx, batch_count, executionType) return end function hipblasScalBatchedExFortran function hipblasScalStridedBatchedExFortran(handle, n, alpha, alphaType, x, xType, incx, stridex, & batch_count, executionType) & bind(c, name='hipblasScalStridedBatchedExFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasScalStridedBatchedExFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: alpha integer(kind(HIPBLAS_R_16F)), value :: alphaType type(c_ptr), value :: x integer(kind(HIPBLAS_R_16F)), value :: xType integer(c_int), value :: incx integer(c_int64_t), value :: stridex integer(c_int), value :: batch_count integer(kind(HIPBLAS_R_16F)), value :: executionType hipblasScalStridedBatchedExFortran = & hipblasScalStridedBatchedEx(handle, n, alpha, alphaType, x, xType, incx, stridex, & batch_count, executionType) return end function hipblasScalStridedBatchedExFortran ! ! CsyrkEx ! function hipblasCsyrkExFortran(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc) & ! bind(c, name = 'hipblasCsyrkExFortran') ! use iso_c_binding ! use hipblas_enums ! implicit none ! integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCsyrkExFortran ! type(c_ptr), value :: handle ! integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo ! integer(kind(HIPBLAS_OP_N)), value :: trans ! integer(c_int), value :: n ! integer(c_int), value :: k ! type(c_ptr), value :: alpha ! type(c_ptr), value :: A ! integer(kind(HIPBLAS_R_16F)), value :: Atype ! integer(c_int), value :: lda ! type(c_ptr), value :: beta ! type(c_ptr), value :: C ! integer(kind(HIPBLAS_R_16F)), value :: Ctype ! integer(c_int), value :: ldc ! ! hipblasCsyrkExFortran = & ! hipblasCsyrkEx(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc) ! end function hipblasCsyrkExFortran ! ! CherkEx ! function hipblasCherkExFortran(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc) & ! bind(c, name = 'hipblasCherkExFortran') ! use iso_c_binding ! use hipblas_enums ! implicit none ! integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCherkExFortran ! type(c_ptr), value :: handle ! integer(kind(HIPBLAS_FILL_MODE_FULL)), value :: uplo ! integer(kind(HIPBLAS_OP_N)), value :: trans ! integer(c_int), value :: n ! integer(c_int), value :: k ! type(c_ptr), value :: alpha ! type(c_ptr), value :: A ! integer(kind(HIPBLAS_R_16F)), value :: Atype ! integer(c_int), value :: lda ! type(c_ptr), value :: beta ! type(c_ptr), value :: C ! integer(kind(HIPBLAS_R_16F)), value :: Ctype ! integer(c_int), value :: ldc ! ! hipblasCherkExFortran = & ! hipblasCherkEx(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc) ! end function hipblasCherkExFortran end module hipblas_interface hipBLAS-rocm-5.5.1/clients/include/hipblas_fortran.hpp000066400000000000000000016417401434647641600227100ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef _HIPBLAS_FORTRAN_HPP #define _HIPBLAS_FORTRAN_HPP /*!\file * This file interfaces with our Fortran BLAS interface. */ /* * ============================================================================ * Fortran functions * ============================================================================ */ extern "C" { /* ========== * Aux * ========== */ hipblasStatus_t hipblasSetVectorFortran(int n, int elemSize, const void* x, int incx, void* y, int incy); hipblasStatus_t hipblasGetVectorFortran(int n, int elemSize, const void* x, int incx, void* y, int incy); hipblasStatus_t hipblasSetMatrixFortran( int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb); hipblasStatus_t hipblasGetMatrixFortran( int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb); hipblasStatus_t hipblasSetVectorAsyncFortran( int n, int elemSize, const void* x, int incx, void* y, int incy, hipStream_t stream); hipblasStatus_t hipblasGetVectorAsyncFortran( int n, int elemSize, const void* x, int incx, void* y, int incy, hipStream_t stream); hipblasStatus_t hipblasSetMatrixAsyncFortran( int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, hipStream_t stream); hipblasStatus_t hipblasGetMatrixAsyncFortran( int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, hipStream_t stream); hipblasStatus_t hipblasSetAtomicsModeFortran(hipblasHandle_t handle, hipblasAtomicsMode_t atomics_mode); hipblasStatus_t hipblasGetAtomicsModeFortran(hipblasHandle_t handle, hipblasAtomicsMode_t* atomics_mode); /* ========== * L1 * ========== */ // scal hipblasStatus_t hipblasSscalFortran(hipblasHandle_t handle, int n, const float* alpha, float* x, int incx); hipblasStatus_t hipblasDscalFortran(hipblasHandle_t handle, int n, const double* alpha, double* x, int incx); hipblasStatus_t hipblasCscalFortran( hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx); hipblasStatus_t hipblasZscalFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx); hipblasStatus_t hipblasCsscalFortran( hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx); hipblasStatus_t hipblasZdscalFortran( hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx); // scalBatched hipblasStatus_t hipblasSscalBatchedFortran( hipblasHandle_t handle, int n, const float* alpha, float* const x[], int incx, int batch_count); hipblasStatus_t hipblasDscalBatchedFortran(hipblasHandle_t handle, int n, const double* alpha, double* const x[], int incx, int batch_count); hipblasStatus_t hipblasCscalBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* const x[], int incx, int batch_count); hipblasStatus_t hipblasZscalBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const x[], int incx, int batch_count); hipblasStatus_t hipblasCsscalBatchedFortran(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* const x[], int incx, int batch_count); hipblasStatus_t hipblasZdscalBatchedFortran(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* const x[], int incx, int batch_count); // scalStridedBatched hipblasStatus_t hipblasSscalStridedBatchedFortran(hipblasHandle_t handle, int n, const float* alpha, float* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasDscalStridedBatchedFortran(hipblasHandle_t handle, int n, const double* alpha, double* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasCscalStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasZscalStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasCsscalStridedBatchedFortran(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasZdscalStridedBatchedFortran(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count); // copy hipblasStatus_t hipblasScopyFortran( hipblasHandle_t handle, int n, const float* x, int incx, float* y, int incy); hipblasStatus_t hipblasDcopyFortran( hipblasHandle_t handle, int n, const double* x, int incx, double* y, int incy); hipblasStatus_t hipblasCcopyFortran( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasComplex* y, int incy); hipblasStatus_t hipblasZcopyFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy); // copyBatched hipblasStatus_t hipblasScopyBatchedFortran(hipblasHandle_t handle, int n, const float* const x[], int incx, float* const y[], int incy, int batch_count); hipblasStatus_t hipblasDcopyBatchedFortran(hipblasHandle_t handle, int n, const double* const x[], int incx, double* const y[], int incy, int batch_count); hipblasStatus_t hipblasCcopyBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, int batch_count); hipblasStatus_t hipblasZcopyBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, int batch_count); // copyStridedBatched hipblasStatus_t hipblasScopyStridedBatchedFortran(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, int batch_count); hipblasStatus_t hipblasDcopyStridedBatchedFortran(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, int batch_count); hipblasStatus_t hipblasCcopyStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count); hipblasStatus_t hipblasZcopyStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count); // dot hipblasStatus_t hipblasSdotFortran(hipblasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result); hipblasStatus_t hipblasDdotFortran(hipblasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result); hipblasStatus_t hipblasHdotFortran(hipblasHandle_t handle, int n, const hipblasHalf* x, int incx, const hipblasHalf* y, int incy, hipblasHalf* result); hipblasStatus_t hipblasBfdotFortran(hipblasHandle_t handle, int n, const hipblasBfloat16* x, int incx, const hipblasBfloat16* y, int incy, hipblasBfloat16* result); hipblasStatus_t hipblasCdotuFortran(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result); hipblasStatus_t hipblasZdotuFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result); hipblasStatus_t hipblasCdotcFortran(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result); hipblasStatus_t hipblasZdotcFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result); // dotBatched hipblasStatus_t hipblasSdotBatchedFortran(hipblasHandle_t handle, int n, const float* const x[], int incx, const float* const y[], int incy, int batch_count, float* result); hipblasStatus_t hipblasDdotBatchedFortran(hipblasHandle_t handle, int n, const double* const x[], int incx, const double* const y[], int incy, int batch_count, double* result); hipblasStatus_t hipblasHdotBatchedFortran(hipblasHandle_t handle, int n, const hipblasHalf* const x[], int incx, const hipblasHalf* const y[], int incy, int batch_count, hipblasHalf* result); hipblasStatus_t hipblasBfdotBatchedFortran(hipblasHandle_t handle, int n, const hipblasBfloat16* const x[], int incx, const hipblasBfloat16* const y[], int incy, int batch_count, hipblasBfloat16* result); hipblasStatus_t hipblasCdotuBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, int batch_count, hipblasComplex* result); hipblasStatus_t hipblasZdotuBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, int batch_count, hipblasDoubleComplex* result); hipblasStatus_t hipblasCdotcBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, int batch_count, hipblasComplex* result); hipblasStatus_t hipblasZdotcBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, int batch_count, hipblasDoubleComplex* result); // dotStridedBatched hipblasStatus_t hipblasSdotStridedBatchedFortran(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, int batch_count, float* result); hipblasStatus_t hipblasDdotStridedBatchedFortran(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, int batch_count, double* result); hipblasStatus_t hipblasHdotStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasHalf* x, int incx, hipblasStride stridex, const hipblasHalf* y, int incy, hipblasStride stridey, int batch_count, hipblasHalf* result); hipblasStatus_t hipblasBfdotStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasBfloat16* x, int incx, hipblasStride stridex, const hipblasBfloat16* y, int incy, hipblasStride stridey, int batch_count, hipblasBfloat16* result); hipblasStatus_t hipblasCdotuStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasComplex* result); hipblasStatus_t hipblasZdotuStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasDoubleComplex* result); hipblasStatus_t hipblasCdotcStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasComplex* result); hipblasStatus_t hipblasZdotcStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasDoubleComplex* result); // swap hipblasStatus_t hipblasSswapFortran(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy); hipblasStatus_t hipblasDswapFortran(hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy); hipblasStatus_t hipblasCswapFortran( hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy); hipblasStatus_t hipblasZswapFortran(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy); // swapBatched hipblasStatus_t hipblasSswapBatchedFortran( hipblasHandle_t handle, int n, float* x[], int incx, float* y[], int incy, int batch_count); hipblasStatus_t hipblasDswapBatchedFortran( hipblasHandle_t handle, int n, double* x[], int incx, double* y[], int incy, int batch_count); hipblasStatus_t hipblasCswapBatchedFortran(hipblasHandle_t handle, int n, hipblasComplex* x[], int incx, hipblasComplex* y[], int incy, int batch_count); hipblasStatus_t hipblasZswapBatchedFortran(hipblasHandle_t handle, int n, hipblasDoubleComplex* x[], int incx, hipblasDoubleComplex* y[], int incy, int batch_count); // swapStridedBatched hipblasStatus_t hipblasSswapStridedBatchedFortran(hipblasHandle_t handle, int n, float* x, int incx, hipblasStride stridex, float* y, int incy, hipblasStride stridey, int batch_count); hipblasStatus_t hipblasDswapStridedBatchedFortran(hipblasHandle_t handle, int n, double* x, int incx, hipblasStride stridex, double* y, int incy, hipblasStride stridey, int batch_count); hipblasStatus_t hipblasCswapStridedBatchedFortran(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count); hipblasStatus_t hipblasZswapStridedBatchedFortran(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count); // axpy hipblasStatus_t hipblasHaxpyFortran(hipblasHandle_t handle, const int N, const hipblasHalf* alpha, const hipblasHalf* x, const int incx, hipblasHalf* y, const int incy); hipblasStatus_t hipblasSaxpyFortran(hipblasHandle_t handle, const int N, const float* alpha, const float* x, const int incx, float* y, const int incy); hipblasStatus_t hipblasDaxpyFortran(hipblasHandle_t handle, const int N, const double* alpha, const double* x, const int incx, double* y, const int incy); hipblasStatus_t hipblasCaxpyFortran(hipblasHandle_t handle, const int N, const hipblasComplex* alpha, const hipblasComplex* x, const int incx, hipblasComplex* y, const int incy); hipblasStatus_t hipblasZaxpyFortran(hipblasHandle_t handle, const int N, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, const int incx, hipblasDoubleComplex* y, const int incy); // axpyBatched hipblasStatus_t hipblasHaxpyBatchedFortran(hipblasHandle_t handle, const int N, const hipblasHalf* alpha, const hipblasHalf* const x[], const int incx, hipblasHalf* const y[], const int incy, const int batch_count); hipblasStatus_t hipblasSaxpyBatchedFortran(hipblasHandle_t handle, const int N, const float* alpha, const float* const x[], const int incx, float* const y[], const int incy, const int batch_count); hipblasStatus_t hipblasSaxpyBatchedFortran(hipblasHandle_t handle, const int N, const float* alpha, const float* const x[], const int incx, float* const y[], const int incy, const int batch_count); hipblasStatus_t hipblasDaxpyBatchedFortran(hipblasHandle_t handle, const int N, const double* alpha, const double* const x[], const int incx, double* const y[], const int incy, const int batch_count); hipblasStatus_t hipblasCaxpyBatchedFortran(hipblasHandle_t handle, const int N, const hipblasComplex* alpha, const hipblasComplex* const x[], const int incx, hipblasComplex* const y[], const int incy, const int batch_count); hipblasStatus_t hipblasZaxpyBatchedFortran(hipblasHandle_t handle, const int N, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], const int incx, hipblasDoubleComplex* const y[], const int incy, const int batch_count); // axpyStridedBatched hipblasStatus_t hipblasHaxpyStridedBatchedFortran(hipblasHandle_t handle, const int N, const hipblasHalf* alpha, const hipblasHalf* x, const int incx, const hipblasStride stride_x, hipblasHalf* y, const int incy, const hipblasStride stride_y, const int batch_count); hipblasStatus_t hipblasSaxpyStridedBatchedFortran(hipblasHandle_t handle, const int N, const float* alpha, const float* x, const int incx, const hipblasStride stride_x, float* y, const int incy, const hipblasStride stride_y, const int batch_count); hipblasStatus_t hipblasDaxpyStridedBatchedFortran(hipblasHandle_t handle, const int N, const double* alpha, const double* x, const int incx, const hipblasStride stride_x, double* y, const int incy, const hipblasStride stride_y, const int batch_count); hipblasStatus_t hipblasCaxpyStridedBatchedFortran(hipblasHandle_t handle, const int N, const hipblasComplex* alpha, const hipblasComplex* x, const int incx, const hipblasStride stride_x, hipblasComplex* y, const int incy, const hipblasStride stride_y, const int batch_count); hipblasStatus_t hipblasZaxpyStridedBatchedFortran(hipblasHandle_t handle, const int N, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, const int incx, const hipblasStride stride_x, hipblasDoubleComplex* y, const int incy, const hipblasStride stride_y, const int batch_count); // asum hipblasStatus_t hipblasSasumFortran(hipblasHandle_t handle, int n, const float* x, int incx, float* result); hipblasStatus_t hipblasDasumFortran(hipblasHandle_t handle, int n, const double* x, int incx, double* result); hipblasStatus_t hipblasScasumFortran( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result); hipblasStatus_t hipblasDzasumFortran( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result); // asumBatched hipblasStatus_t hipblasSasumBatchedFortran(hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, float* results); hipblasStatus_t hipblasDasumBatchedFortran(hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, double* results); hipblasStatus_t hipblasScasumBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, float* results); hipblasStatus_t hipblasDzasumBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, double* results); // asumStridedBatched hipblasStatus_t hipblasSasumStridedBatchedFortran(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, float* results); hipblasStatus_t hipblasDasumStridedBatchedFortran(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, double* results); hipblasStatus_t hipblasScasumStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, float* results); hipblasStatus_t hipblasDzasumStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, double* results); // nrm2 hipblasStatus_t hipblasSnrm2Fortran(hipblasHandle_t handle, int n, const float* x, int incx, float* result); hipblasStatus_t hipblasDnrm2Fortran(hipblasHandle_t handle, int n, const double* x, int incx, double* result); hipblasStatus_t hipblasScnrm2Fortran( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result); hipblasStatus_t hipblasDznrm2Fortran( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result); // nrm2Batched hipblasStatus_t hipblasSnrm2BatchedFortran(hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, float* results); hipblasStatus_t hipblasDnrm2BatchedFortran(hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, double* results); hipblasStatus_t hipblasScnrm2BatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, float* results); hipblasStatus_t hipblasDznrm2BatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, double* results); // nrm2StridedBatched hipblasStatus_t hipblasSnrm2StridedBatchedFortran(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, float* results); hipblasStatus_t hipblasDnrm2StridedBatchedFortran(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, double* results); hipblasStatus_t hipblasScnrm2StridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, float* results); hipblasStatus_t hipblasDznrm2StridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, double* results); // amax hipblasStatus_t hipblasIsamaxFortran(hipblasHandle_t handle, int n, const float* x, int incx, int* result); hipblasStatus_t hipblasIdamaxFortran(hipblasHandle_t handle, int n, const double* x, int incx, int* result); hipblasStatus_t hipblasIcamaxFortran( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result); hipblasStatus_t hipblasIzamaxFortran( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result); // amaxBatched hipblasStatus_t hipblasIsamaxBatchedFortran( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, int* result); hipblasStatus_t hipblasIdamaxBatchedFortran( hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, int* result); hipblasStatus_t hipblasIcamaxBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, int* result); hipblasStatus_t hipblasIzamaxBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, int* result); // amaxStridedBatched hipblasStatus_t hipblasIsamaxStridedBatchedFortran(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, int* result); hipblasStatus_t hipblasIdamaxStridedBatchedFortran(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, int* result); hipblasStatus_t hipblasIcamaxStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, int* result); hipblasStatus_t hipblasIzamaxStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, int* result); // amin hipblasStatus_t hipblasIsaminFortran(hipblasHandle_t handle, int n, const float* x, int incx, int* result); hipblasStatus_t hipblasIdaminFortran(hipblasHandle_t handle, int n, const double* x, int incx, int* result); hipblasStatus_t hipblasIcaminFortran( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result); hipblasStatus_t hipblasIzaminFortran( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result); // aminBatched hipblasStatus_t hipblasIsaminBatchedFortran( hipblasHandle_t handle, int n, const float* const x[], int incx, int batch_count, int* result); hipblasStatus_t hipblasIdaminBatchedFortran( hipblasHandle_t handle, int n, const double* const x[], int incx, int batch_count, int* result); hipblasStatus_t hipblasIcaminBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, int* result); hipblasStatus_t hipblasIzaminBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, int* result); // aminStridedBatched hipblasStatus_t hipblasIsaminStridedBatchedFortran(hipblasHandle_t handle, int n, const float* x, int incx, hipblasStride stridex, int batch_count, int* result); hipblasStatus_t hipblasIdaminStridedBatchedFortran(hipblasHandle_t handle, int n, const double* x, int incx, hipblasStride stridex, int batch_count, int* result); hipblasStatus_t hipblasIcaminStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, int* result); hipblasStatus_t hipblasIzaminStridedBatchedFortran(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, int* result); // rot hipblasStatus_t hipblasSrotFortran(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* c, const float* s); hipblasStatus_t hipblasDrotFortran(hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* c, const double* s); hipblasStatus_t hipblasCrotFortran(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const hipblasComplex* s); hipblasStatus_t hipblasCsrotFortran(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const float* s); hipblasStatus_t hipblasZrotFortran(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const hipblasDoubleComplex* s); hipblasStatus_t hipblasZdrotFortran(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const double* s); // rotBatched hipblasStatus_t hipblasSrotBatchedFortran(hipblasHandle_t handle, int n, float* const x[], int incx, float* const y[], int incy, const float* c, const float* s, int batch_count); hipblasStatus_t hipblasDrotBatchedFortran(hipblasHandle_t handle, int n, double* const x[], int incx, double* const y[], int incy, const double* c, const double* s, int batch_count); hipblasStatus_t hipblasCrotBatchedFortran(hipblasHandle_t handle, int n, hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, const float* c, const hipblasComplex* s, int batch_count); hipblasStatus_t hipblasCsrotBatchedFortran(hipblasHandle_t handle, int n, hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, const float* c, const float* s, int batch_count); hipblasStatus_t hipblasZrotBatchedFortran(hipblasHandle_t handle, int n, hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, const double* c, const hipblasDoubleComplex* s, int batch_count); hipblasStatus_t hipblasZdrotBatchedFortran(hipblasHandle_t handle, int n, hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, const double* c, const double* s, int batch_count); // rotStridedBatched hipblasStatus_t hipblasSrotStridedBatchedFortran(hipblasHandle_t handle, int n, float* x, int incx, hipblasStride stride_x, float* y, int incy, hipblasStride stride_y, const float* c, const float* s, int batch_count); hipblasStatus_t hipblasDrotStridedBatchedFortran(hipblasHandle_t handle, int n, double* x, int incx, hipblasStride stride_x, double* y, int incy, hipblasStride stride_y, const double* c, const double* s, int batch_count); hipblasStatus_t hipblasCrotStridedBatchedFortran(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stride_x, hipblasComplex* y, int incy, hipblasStride stride_y, const float* c, const hipblasComplex* s, int batch_count); hipblasStatus_t hipblasCsrotStridedBatchedFortran(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stride_x, hipblasComplex* y, int incy, hipblasStride stride_y, const float* c, const float* s, int batch_count); hipblasStatus_t hipblasZrotStridedBatchedFortran(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, const double* c, const hipblasDoubleComplex* s, int batch_count); hipblasStatus_t hipblasZdrotStridedBatchedFortran(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, const double* c, const double* s, int batch_count); // rotg hipblasStatus_t hipblasSrotgFortran(hipblasHandle_t handle, float* a, float* b, float* c, float* s); hipblasStatus_t hipblasDrotgFortran(hipblasHandle_t handle, double* a, double* b, double* c, double* s); hipblasStatus_t hipblasCrotgFortran( hipblasHandle_t handle, hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s); hipblasStatus_t hipblasZrotgFortran(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s); // rotgBatched hipblasStatus_t hipblasSrotgBatchedFortran(hipblasHandle_t handle, float* const a[], float* const b[], float* const c[], float* const s[], int batch_count); hipblasStatus_t hipblasDrotgBatchedFortran(hipblasHandle_t handle, double* const a[], double* const b[], double* const c[], double* const s[], int batch_count); hipblasStatus_t hipblasCrotgBatchedFortran(hipblasHandle_t handle, hipblasComplex* const a[], hipblasComplex* const b[], float* const c[], hipblasComplex* const s[], int batch_count); hipblasStatus_t hipblasZrotgBatchedFortran(hipblasHandle_t handle, hipblasDoubleComplex* const a[], hipblasDoubleComplex* const b[], double* const c[], hipblasDoubleComplex* const s[], int batch_count); // rotgStridedBatched hipblasStatus_t hipblasSrotgStridedBatchedFortran(hipblasHandle_t handle, float* a, hipblasStride stride_a, float* b, hipblasStride stride_b, float* c, hipblasStride stride_c, float* s, hipblasStride stride_s, int batch_count); hipblasStatus_t hipblasDrotgStridedBatchedFortran(hipblasHandle_t handle, double* a, hipblasStride stride_a, double* b, hipblasStride stride_b, double* c, hipblasStride stride_c, double* s, hipblasStride stride_s, int batch_count); hipblasStatus_t hipblasCrotgStridedBatchedFortran(hipblasHandle_t handle, hipblasComplex* a, hipblasStride stride_a, hipblasComplex* b, hipblasStride stride_b, float* c, hipblasStride stride_c, hipblasComplex* s, hipblasStride stride_s, int batch_count); hipblasStatus_t hipblasZrotgStridedBatchedFortran(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasStride stride_a, hipblasDoubleComplex* b, hipblasStride stride_b, double* c, hipblasStride stride_c, hipblasDoubleComplex* s, hipblasStride stride_s, int batch_count); // rotm hipblasStatus_t hipblasSrotmFortran( hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* param); hipblasStatus_t hipblasDrotmFortran( hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* param); // rotmBatched hipblasStatus_t hipblasSrotmBatchedFortran(hipblasHandle_t handle, int n, float* const x[], int incx, float* const y[], int incy, const float* const param[], int batch_count); hipblasStatus_t hipblasDrotmBatchedFortran(hipblasHandle_t handle, int n, double* const x[], int incx, double* const y[], int incy, const double* const param[], int batch_count); // rotmStridedBatched hipblasStatus_t hipblasSrotmStridedBatchedFortran(hipblasHandle_t handle, int n, float* x, int incx, hipblasStride stride_x, float* y, int incy, hipblasStride stride_y, const float* param, hipblasStride stride_param, int batch_count); hipblasStatus_t hipblasDrotmStridedBatchedFortran(hipblasHandle_t handle, int n, double* x, int incx, hipblasStride stride_x, double* y, int incy, hipblasStride stride_y, const double* param, hipblasStride stride_param, int batch_count); // rotmg hipblasStatus_t hipblasSrotmgFortran( hipblasHandle_t handle, float* d1, float* d2, float* x1, const float* y1, float* param); hipblasStatus_t hipblasDrotmgFortran( hipblasHandle_t handle, double* d1, double* d2, double* x1, const double* y1, double* param); // rotmgBatched hipblasStatus_t hipblasSrotmgBatchedFortran(hipblasHandle_t handle, float* const d1[], float* const d2[], float* const x1[], const float* const y1[], float* const param[], int batch_count); hipblasStatus_t hipblasDrotmgBatchedFortran(hipblasHandle_t handle, double* const d1[], double* const d2[], double* const x1[], const double* const y1[], double* const param[], int batch_count); // rotmgStridedBatched hipblasStatus_t hipblasSrotmgStridedBatchedFortran(hipblasHandle_t handle, float* d1, hipblasStride stride_d1, float* d2, hipblasStride stride_d2, float* x1, hipblasStride stride_x1, const float* y1, hipblasStride stride_y1, float* param, hipblasStride stride_param, int batch_count); hipblasStatus_t hipblasDrotmgStridedBatchedFortran(hipblasHandle_t handle, double* d1, hipblasStride stride_d1, double* d2, hipblasStride stride_d2, double* x1, hipblasStride stride_x1, const double* y1, hipblasStride stride_y1, double* param, hipblasStride stride_param, int batch_count); /* ========== * L2 * ========== */ // ger hipblasStatus_t hipblasSgerFortran(hipblasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); hipblasStatus_t hipblasDgerFortran(hipblasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); hipblasStatus_t hipblasCgeruFortran(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda); hipblasStatus_t hipblasCgercFortran(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda); hipblasStatus_t hipblasZgeruFortran(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda); hipblasStatus_t hipblasZgercFortran(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda); // ger_batched hipblasStatus_t hipblasSgerBatchedFortran(hipblasHandle_t handle, int m, int n, const float* alpha, const float* const x[], int incx, const float* const y[], int incy, float* const A[], int lda, int batch_count); hipblasStatus_t hipblasDgerBatchedFortran(hipblasHandle_t handle, int m, int n, const double* alpha, const double* const x[], int incx, const double* const y[], int incy, double* const A[], int lda, int batch_count); hipblasStatus_t hipblasCgeruBatchedFortran(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batch_count); hipblasStatus_t hipblasCgercBatchedFortran(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batch_count); hipblasStatus_t hipblasZgeruBatchedFortran(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batch_count); hipblasStatus_t hipblasZgercBatchedFortran(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batch_count); // ger_strided_batched hipblasStatus_t hipblasSgerStridedBatchedFortran(hipblasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, float* A, int lda, hipblasStride strideA, int batch_count); hipblasStatus_t hipblasDgerStridedBatchedFortran(hipblasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, double* A, int lda, hipblasStride strideA, int batch_count); hipblasStatus_t hipblasCgeruStridedBatchedFortran(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count); hipblasStatus_t hipblasCgercStridedBatchedFortran(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count); hipblasStatus_t hipblasZgeruStridedBatchedFortran(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count); hipblasStatus_t hipblasZgercStridedBatchedFortran(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count); // hbmv hipblasStatus_t hipblasChbmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); hipblasStatus_t hipblasZhbmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); // hbmv_batched hipblasStatus_t hipblasChbmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batchCount); hipblasStatus_t hipblasZhbmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batchCount); // hbmv_strided_batched hipblasStatus_t hipblasChbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount); hipblasStatus_t hipblasZhbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount); // hemv hipblasStatus_t hipblasChemvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); hipblasStatus_t hipblasZhemvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); // hemv_batched hipblasStatus_t hipblasChemvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count); hipblasStatus_t hipblasZhemvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count); // hemv_strided_batched hipblasStatus_t hipblasChemvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride stride_a, const hipblasComplex* x, int incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stride_y, int batch_count); hipblasStatus_t hipblasZhemvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, int batch_count); // her hipblasStatus_t hipblasCherFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* A, int lda); hipblasStatus_t hipblasZherFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda); // her_batched hipblasStatus_t hipblasCherBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const A[], int lda, int batchCount); hipblasStatus_t hipblasZherBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const A[], int lda, int batchCount); // her_strided_batched hipblasStatus_t hipblasCherStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount); hipblasStatus_t hipblasZherStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount); // her2 hipblasStatus_t hipblasCher2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda); hipblasStatus_t hipblasZher2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda); // her2_batched hipblasStatus_t hipblasCher2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batchCount); hipblasStatus_t hipblasZher2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batchCount); // her2_strided_batched hipblasStatus_t hipblasCher2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount); hipblasStatus_t hipblasZher2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount); // hpmv hipblasStatus_t hipblasChpmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); hipblasStatus_t hipblasZhpmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); // hpmv_batched hipblasStatus_t hipblasChpmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const AP[], const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batchCount); hipblasStatus_t hipblasZhpmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batchCount); // hpmv_strided_batched hipblasStatus_t hipblasChpmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, hipblasStride strideAP, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount); hipblasStatus_t hipblasZhpmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, hipblasStride strideAP, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount); // hpr hipblasStatus_t hipblasChprFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP); hipblasStatus_t hipblasZhprFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP); // hpr_batched hipblasStatus_t hipblasChprBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const AP[], int batchCount); hipblasStatus_t hipblasZhprBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const AP[], int batchCount); // hpr_strided_batched hipblasStatus_t hipblasChprStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int batchCount); hipblasStatus_t hipblasZhprStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount); // hpr2 hipblasStatus_t hipblasChpr2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* AP); hipblasStatus_t hipblasZhpr2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP); // hpr2_batched hipblasStatus_t hipblasChpr2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const AP[], int batchCount); hipblasStatus_t hipblasZhpr2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const AP[], int batchCount); // hpr2_strided_batched hipblasStatus_t hipblasChpr2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* AP, hipblasStride strideAP, int batchCount); hipblasStatus_t hipblasZhpr2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount); // sbmv hipblasStatus_t hipblasSsbmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); hipblasStatus_t hipblasDsbmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); // sbmv_batched hipblasStatus_t hipblasSsbmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* y[], int incy, int batchCount); hipblasStatus_t hipblasDsbmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* y[], int incy, int batchCount); // sbmv_strided_batched hipblasStatus_t hipblasSsbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount); hipblasStatus_t hipblasDsbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount); // spmv hipblasStatus_t hipblasSspmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* AP, const float* x, int incx, const float* beta, float* y, int incy); hipblasStatus_t hipblasDspmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* AP, const double* x, int incx, const double* beta, double* y, int incy); // spmv_batched hipblasStatus_t hipblasSspmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const AP[], const float* const x[], int incx, const float* beta, float* y[], int incy, int batchCount); hipblasStatus_t hipblasDspmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const AP[], const double* const x[], int incx, const double* beta, double* y[], int incy, int batchCount); // spmv_strided_batched hipblasStatus_t hipblasSspmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* AP, hipblasStride strideAP, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount); hipblasStatus_t hipblasDspmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* AP, hipblasStride strideAP, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount); // spr hipblasStatus_t hipblasSsprFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP); hipblasStatus_t hipblasDsprFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* AP); hipblasStatus_t hipblasCsprFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP); hipblasStatus_t hipblasZsprFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP); // spr_batched hipblasStatus_t hipblasSsprBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, float* const AP[], int batchCount); hipblasStatus_t hipblasDsprBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, double* const AP[], int batchCount); hipblasStatus_t hipblasCsprBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const AP[], int batchCount); hipblasStatus_t hipblasZsprBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const AP[], int batchCount); // spr_strided_batched hipblasStatus_t hipblasSsprStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, float* AP, hipblasStride strideAP, int batchCount); hipblasStatus_t hipblasDsprStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, double* AP, hipblasStride strideAP, int batchCount); hipblasStatus_t hipblasCsprStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int batchCount); hipblasStatus_t hipblasZsprStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount); // spr2 hipblasStatus_t hipblasSspr2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* AP); hipblasStatus_t hipblasDspr2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* AP); // spr2_batched hipblasStatus_t hipblasSspr2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, const float* const y[], int incy, float* const AP[], int batchCount); hipblasStatus_t hipblasDspr2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, const double* const y[], int incy, double* const AP[], int batchCount); // spr2_strided_batched hipblasStatus_t hipblasSspr2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, float* AP, hipblasStride strideAP, int batchCount); hipblasStatus_t hipblasDspr2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, double* AP, hipblasStride strideAP, int batchCount); // symv hipblasStatus_t hipblasSsymvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); hipblasStatus_t hipblasDsymvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); hipblasStatus_t hipblasCsymvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); hipblasStatus_t hipblasZsymvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); // symv_batched hipblasStatus_t hipblasSsymvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* y[], int incy, int batchCount); hipblasStatus_t hipblasDsymvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* y[], int incy, int batchCount); hipblasStatus_t hipblasCsymvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* y[], int incy, int batchCount); hipblasStatus_t hipblasZsymvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y[], int incy, int batchCount); // symv_strided_batched hipblasStatus_t hipblasSsymvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount); hipblasStatus_t hipblasDsymvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount); hipblasStatus_t hipblasCsymvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount); hipblasStatus_t hipblasZsymvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount); // syr hipblasStatus_t hipblasSsyrFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* A, int lda); hipblasStatus_t hipblasDsyrFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* A, int lda); hipblasStatus_t hipblasCsyrFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* A, int lda); hipblasStatus_t hipblasZsyrFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda); // syr_batched hipblasStatus_t hipblasSsyrBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, float* const A[], int lda, int batch_count); hipblasStatus_t hipblasDsyrBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, double* const A[], int lda, int batch_count); hipblasStatus_t hipblasCsyrBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const A[], int lda, int batch_count); hipblasStatus_t hipblasZsyrBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const A[], int lda, int batch_count); // syr_strided_batched hipblasStatus_t hipblasSsyrStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, float* A, int lda, hipblasStride strideA, int batch_count); hipblasStatus_t hipblasDsyrStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, double* A, int lda, hipblasStride strideA, int batch_count); hipblasStatus_t hipblasCsyrStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count); hipblasStatus_t hipblasZsyrStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count); // syr2 hipblasStatus_t hipblasSsyr2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); hipblasStatus_t hipblasDsyr2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); hipblasStatus_t hipblasCsyr2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda); hipblasStatus_t hipblasZsyr2Fortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda); // syr2_batched hipblasStatus_t hipblasSsyr2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* const x[], int incx, const float* const y[], int incy, float* const A[], int lda, int batchCount); hipblasStatus_t hipblasDsyr2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* const x[], int incx, const double* const y[], int incy, double* const A[], int lda, int batchCount); hipblasStatus_t hipblasCsyr2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batchCount); hipblasStatus_t hipblasZsyr2BatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batchCount); // syr2_strided_batched hipblasStatus_t hipblasSsyr2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, hipblasStride stridex, const float* y, int incy, hipblasStride stridey, float* A, int lda, hipblasStride strideA, int batchCount); hipblasStatus_t hipblasDsyr2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, hipblasStride stridex, const double* y, int incy, hipblasStride stridey, double* A, int lda, hipblasStride strideA, int batchCount); hipblasStatus_t hipblasCsyr2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount); hipblasStatus_t hipblasZsyr2StridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount); // tbmv hipblasStatus_t hipblasStbmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, float* x, int incx); hipblasStatus_t hipblasDtbmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, double* x, int incx); hipblasStatus_t hipblasCtbmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx); hipblasStatus_t hipblasZtbmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx); // tbmv_batched hipblasStatus_t hipblasStbmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* const A[], int lda, float* const x[], int incx, int batch_count); hipblasStatus_t hipblasDtbmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* const A[], int lda, double* const x[], int incx, int batch_count); hipblasStatus_t hipblasCtbmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count); hipblasStatus_t hipblasZtbmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count); // tbmv_strided_batched hipblasStatus_t hipblasStbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, hipblasStride stride_a, float* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasDtbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, hipblasStride stride_a, double* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasCtbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasStride stride_a, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasZtbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count); // tbsv hipblasStatus_t hipblasStbsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, float* x, int incx); hipblasStatus_t hipblasDtbsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, double* x, int incx); hipblasStatus_t hipblasCtbsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx); hipblasStatus_t hipblasZtbsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx); // tbsv_batched hipblasStatus_t hipblasStbsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* const A[], int lda, float* const x[], int incx, int batchCount); hipblasStatus_t hipblasDtbsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* const A[], int lda, double* const x[], int incx, int batchCount); hipblasStatus_t hipblasCtbsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batchCount); hipblasStatus_t hipblasZtbsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batchCount); // tbsv_strided_batched hipblasStatus_t hipblasStbsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* A, int lda, hipblasStride strideA, float* x, int incx, hipblasStride stridex, int batchCount); hipblasStatus_t hipblasDtbsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* A, int lda, hipblasStride strideA, double* x, int incx, hipblasStride stridex, int batchCount); hipblasStatus_t hipblasCtbsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount); hipblasStatus_t hipblasZtbsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount); // tpmv hipblasStatus_t hipblasStpmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, float* x, int incx); hipblasStatus_t hipblasDtpmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, double* x, int incx); hipblasStatus_t hipblasCtpmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx); hipblasStatus_t hipblasZtpmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx); // tpmv_batched hipblasStatus_t hipblasStpmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const AP[], float* const x[], int incx, int batchCount); hipblasStatus_t hipblasDtpmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const AP[], double* const x[], int incx, int batchCount); hipblasStatus_t hipblasCtpmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const AP[], hipblasComplex* const x[], int incx, int batchCount); hipblasStatus_t hipblasZtpmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int incx, int batchCount); // tpmv_strided_batched hipblasStatus_t hipblasStpmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, hipblasStride strideAP, float* x, int incx, hipblasStride stridex, int batchCount); hipblasStatus_t hipblasDtpmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, hipblasStride strideAP, double* x, int incx, hipblasStride stridex, int batchCount); hipblasStatus_t hipblasCtpmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount); hipblasStatus_t hipblasZtpmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount); // tpsv hipblasStatus_t hipblasStpsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, float* x, int incx); hipblasStatus_t hipblasDtpsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, double* x, int incx); hipblasStatus_t hipblasCtpsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx); hipblasStatus_t hipblasZtpsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx); // tpsv_bathced hipblasStatus_t hipblasStpsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const AP[], float* const x[], int incx, int batchCount); hipblasStatus_t hipblasDtpsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const AP[], double* const x[], int incx, int batchCount); hipblasStatus_t hipblasCtpsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const AP[], hipblasComplex* const x[], int incx, int batchCount); hipblasStatus_t hipblasZtpsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int incx, int batchCount); // tpsv_strided_batched hipblasStatus_t hipblasStpsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, hipblasStride strideAP, float* x, int incx, hipblasStride stridex, int batchCount); hipblasStatus_t hipblasDtpsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, hipblasStride strideAP, double* x, int incx, hipblasStride stridex, int batchCount); hipblasStatus_t hipblasCtpsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount); hipblasStatus_t hipblasZtpsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount); // trmv hipblasStatus_t hipblasStrmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, float* x, int incx); hipblasStatus_t hipblasDtrmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, double* x, int incx); hipblasStatus_t hipblasCtrmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx); hipblasStatus_t hipblasZtrmvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx); // trmv_batched hipblasStatus_t hipblasStrmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const A[], int lda, float* const x[], int incx, int batch_count); hipblasStatus_t hipblasDtrmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const A[], int lda, double* const x[], int incx, int batch_count); hipblasStatus_t hipblasCtrmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count); hipblasStatus_t hipblasZtrmvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count); // trmv_strided_batched hipblasStatus_t hipblasStrmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, hipblasStride stride_a, float* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasDtrmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, hipblasStride stride_a, double* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasCtrmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasStride stride_a, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count); hipblasStatus_t hipblasZtrmvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count); // trsv hipblasStatus_t hipblasStrsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, float* x, int incx); hipblasStatus_t hipblasDtrsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, double* x, int incx); hipblasStatus_t hipblasCtrsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx); hipblasStatus_t hipblasZtrsvFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx); // trsv_batched hipblasStatus_t hipblasStrsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* const A[], int lda, float* const x[], int incx, int batch_count); hipblasStatus_t hipblasDtrsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* const A[], int lda, double* const x[], int incx, int batch_count); hipblasStatus_t hipblasCtrsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count); hipblasStatus_t hipblasZtrsvBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count); // trsv_strided_batched hipblasStatus_t hipblasStrsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* A, int lda, hipblasStride strideA, float* x, int incx, hipblasStride stridex, int batch_count); hipblasStatus_t hipblasDtrsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* A, int lda, hipblasStride strideA, double* x, int incx, hipblasStride stridex, int batch_count); hipblasStatus_t hipblasCtrsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count); hipblasStatus_t hipblasZtrsvStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count); // gbmv hipblasStatus_t hipblasSgbmvFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); hipblasStatus_t hipblasDgbmvFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); hipblasStatus_t hipblasCgbmvFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); hipblasStatus_t hipblasZgbmvFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); // gbmv_batched hipblasStatus_t hipblasSgbmvBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* const y[], int incy, int batch_count); hipblasStatus_t hipblasDgbmvBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* const y[], int incy, int batch_count); hipblasStatus_t hipblasCgbmvBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count); hipblasStatus_t hipblasZgbmvBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count); // gbmv_strided_batched hipblasStatus_t hipblasSgbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const float* alpha, const float* A, int lda, hipblasStride stride_a, const float* x, int incx, hipblasStride stride_x, const float* beta, float* y, int incy, hipblasStride stride_y, int batch_count); hipblasStatus_t hipblasDgbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const double* alpha, const double* A, int lda, hipblasStride stride_a, const double* x, int incx, hipblasStride stride_x, const double* beta, double* y, int incy, hipblasStride stride_y, int batch_count); hipblasStatus_t hipblasCgbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride stride_a, const hipblasComplex* x, int incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stride_y, int batch_count); hipblasStatus_t hipblasZgbmvStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, int batch_count); // gemv hipblasStatus_t hipblasSgemvFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); hipblasStatus_t hipblasDgemvFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); hipblasStatus_t hipblasCgemvFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); hipblasStatus_t hipblasZgemvFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); // gemv_batched hipblasStatus_t hipblasSgemvBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* const A[], int lda, const float* const x[], int incx, const float* beta, float* const y[], int incy, int batch_count); hipblasStatus_t hipblasDgemvBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* const A[], int lda, const double* const x[], int incx, const double* beta, double* const y[], int incy, int batch_count); hipblasStatus_t hipblasCgemvBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count); hipblasStatus_t hipblasZgemvBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count); // gemv_strided_batched hipblasStatus_t hipblasSgemvStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batch_count); hipblasStatus_t hipblasDgemvStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batch_count); hipblasStatus_t hipblasCgemvStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count); hipblasStatus_t hipblasZgemvStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count); /* ========== * L3 * ========== */ // herk hipblasStatus_t hipblasCherkFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* A, int lda, const float* beta, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZherkFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* A, int lda, const double* beta, hipblasDoubleComplex* C, int ldc); // herk_batched hipblasStatus_t hipblasCherkBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* const A[], int lda, const float* beta, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZherkBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* const A[], int lda, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount); // herk_strided_batched hipblasStatus_t hipblasCherkStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasZherkStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount); // herkx hipblasStatus_t hipblasCherkxFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const float* beta, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZherkxFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const double* beta, hipblasDoubleComplex* C, int ldc); // herkx_batched hipblasStatus_t hipblasCherkxBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const float* beta, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZherkxBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount); // herkx_strided_batched hipblasStatus_t hipblasCherkxStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasZherkxStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount); // her2k hipblasStatus_t hipblasCher2kFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const float* beta, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZher2kFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const double* beta, hipblasDoubleComplex* C, int ldc); // her2k_batched hipblasStatus_t hipblasCher2kBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const float* beta, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZher2kBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount); // her2k_strided_batched hipblasStatus_t hipblasCher2kStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasZher2kStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount); // symm hipblasStatus_t hipblasSsymmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); hipblasStatus_t hipblasDsymmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); hipblasStatus_t hipblasCsymmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZsymmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc); // symm_batched hipblasStatus_t hipblasSsymmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount); hipblasStatus_t hipblasDsymmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount); hipblasStatus_t hipblasCsymmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZsymmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount); // symm_strided_batched hipblasStatus_t hipblasSsymmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* B, int ldb, hipblasStride strideB, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasDsymmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* B, int ldb, hipblasStride strideB, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasCsymmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasZsymmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount); // syrk hipblasStatus_t hipblasSsyrkFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); hipblasStatus_t hipblasDsyrkFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, const double* beta, double* C, int ldc); hipblasStatus_t hipblasCsyrkFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* beta, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZsyrkFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc); // syrk_batched hipblasStatus_t hipblasSsyrkBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* const A[], int lda, const float* beta, float* const C[], int ldc, int batchCount); hipblasStatus_t hipblasDsyrkBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* const A[], int lda, const double* beta, double* const C[], int ldc, int batchCount); hipblasStatus_t hipblasCsyrkBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZsyrkBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount); // syrk_strided_batched hipblasStatus_t hipblasSsyrkStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasDsyrkStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasCsyrkStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasZsyrkStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount); // syr2k hipblasStatus_t hipblasSsyr2kFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); hipblasStatus_t hipblasDsyr2kFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); hipblasStatus_t hipblasCsyr2kFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZsyr2kFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc); // syr2k_batched hipblasStatus_t hipblasSsyr2kBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount); hipblasStatus_t hipblasDsyr2kBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount); hipblasStatus_t hipblasCsyr2kBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZsyr2kBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount); // syr2k_strided_batched hipblasStatus_t hipblasSsyr2kStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* B, int ldb, hipblasStride strideB, const float* beta, float* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasDsyr2kStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* B, int ldb, hipblasStride strideB, const double* beta, double* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasCsyr2kStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasZsyr2kStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount); // syrkx hipblasStatus_t hipblasSsyrkxFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); hipblasStatus_t hipblasDsyrkxFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); hipblasStatus_t hipblasCsyrkxFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZsyrkxFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc); // syrkx_batched hipblasStatus_t hipblasSsyrkxBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount); hipblasStatus_t hipblasDsyrkxBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount); hipblasStatus_t hipblasCsyrkxBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZsyrkxBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount); // syrkx_strided_batched hipblasStatus_t hipblasSsyrkxStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* B, int ldb, hipblasStride strideB, const float* beta, float* C, int ldc, hipblasStride stridec, int batchCount); hipblasStatus_t hipblasDsyrkxStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* B, int ldb, hipblasStride strideB, const double* beta, double* C, int ldc, hipblasStride stridec, int batchCount); hipblasStatus_t hipblasCsyrkxStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride stridec, int batchCount); hipblasStatus_t hipblasZsyrkxStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride stridec, int batchCount); // geam hipblasStatus_t hipblasSgeamFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc); hipblasStatus_t hipblasDgeamFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc); hipblasStatus_t hipblasCgeamFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* beta, const hipblasComplex* B, int ldb, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZgeamFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex* C, int ldc); // geam_batched hipblasStatus_t hipblasSgeamBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const float* alpha, const float* const A[], int lda, const float* beta, const float* const B[], int ldb, float* const C[], int ldc, int batchCount); hipblasStatus_t hipblasDgeamBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const double* alpha, const double* const A[], int lda, const double* beta, const double* const B[], int ldb, double* const C[], int ldc, int batchCount); hipblasStatus_t hipblasCgeamBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* beta, const hipblasComplex* const B[], int ldb, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZgeamBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* const B[], int ldb, hipblasDoubleComplex* const C[], int ldc, int batchCount); // geam_strided_batched hipblasStatus_t hipblasSgeamStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, const float* beta, const float* B, int ldb, hipblasStride strideB, float* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasDgeamStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, const double* beta, const double* B, int ldb, hipblasStride strideB, double* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasCgeamStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* beta, const hipblasComplex* B, int ldb, hipblasStride strideB, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasZgeamStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount); // hemm hipblasStatus_t hipblasChemmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZhemmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc); // hemm_batched hipblasStatus_t hipblasChemmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZhemmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount); // hemm_strided_batched hipblasStatus_t hipblasChemmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount); hipblasStatus_t hipblasZhemmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount); // trmm hipblasStatus_t hipblasStrmmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, float* B, int ldb); hipblasStatus_t hipblasDtrmmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, double* B, int ldb); hipblasStatus_t hipblasCtrmmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasComplex* B, int ldb); hipblasStatus_t hipblasZtrmmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb); // trmm_batched hipblasStatus_t hipblasStrmmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); hipblasStatus_t hipblasDtrmmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); hipblasStatus_t hipblasCtrmmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, hipblasComplex* const B[], int ldb, int batchCount); hipblasStatus_t hipblasZtrmmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const B[], int ldb, int batchCount); // trmm_strided_batched hipblasStatus_t hipblasStrmmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, hipblasStride strideA, float* B, int ldb, hipblasStride strideB, int batchCount); hipblasStatus_t hipblasDtrmmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, hipblasStride strideA, double* B, int ldb, hipblasStride strideB, int batchCount); hipblasStatus_t hipblasCtrmmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* B, int ldb, hipblasStride strideB, int batchCount); hipblasStatus_t hipblasZtrmmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* B, int ldb, hipblasStride strideB, int batchCount); // trtri hipblasStatus_t hipblasStrtriFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, float* A, int lda, float* invA, int ldinvA); hipblasStatus_t hipblasDtrtriFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, double* A, int lda, double* invA, int ldinvA); hipblasStatus_t hipblasCtrtriFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasComplex* A, int lda, hipblasComplex* invA, int ldinvA); hipblasStatus_t hipblasZtrtriFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* invA, int ldinvA); // trtri_batched hipblasStatus_t hipblasStrtriBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, float* const A[], int lda, float* const invA[], int ldinvA, int batch_count); hipblasStatus_t hipblasDtrtriBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, double* const A[], int lda, double* const invA[], int ldinvA, int batch_count); hipblasStatus_t hipblasCtrtriBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasComplex* const A[], int lda, hipblasComplex* const invA[], int ldinvA, int batch_count); hipblasStatus_t hipblasZtrtriBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const invA[], int ldinvA, int batch_count); // trtri_strided_batched hipblasStatus_t hipblasStrtriStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, float* A, int lda, hipblasStride stride_A, float* invA, int ldinvA, hipblasStride stride_invA, int batch_count); hipblasStatus_t hipblasDtrtriStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, double* A, int lda, hipblasStride stride_A, double* invA, int ldinvA, hipblasStride stride_invA, int batch_count); hipblasStatus_t hipblasCtrtriStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasComplex* A, int lda, hipblasStride stride_A, hipblasComplex* invA, int ldinvA, hipblasStride stride_invA, int batch_count); hipblasStatus_t hipblasZtrtriStridedBatchedFortran(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, hipblasDoubleComplex* A, int lda, hipblasStride stride_A, hipblasDoubleComplex* invA, int ldinvA, hipblasStride stride_invA, int batch_count); // dgmm hipblasStatus_t hipblasSdgmmFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc); hipblasStatus_t hipblasDdgmmFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* A, int lda, const double* x, int incx, double* C, int ldc); hipblasStatus_t hipblasCdgmmFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZdgmmFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* C, int ldc); // dgmm_batched hipblasStatus_t hipblasSdgmmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* const A[], int lda, const float* const x[], int incx, float* const C[], int ldc, int batch_count); hipblasStatus_t hipblasDdgmmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* const A[], int lda, const double* const x[], int incx, double* const C[], int ldc, int batch_count); hipblasStatus_t hipblasCdgmmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, hipblasComplex* const C[], int ldc, int batch_count); hipblasStatus_t hipblasZdgmmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const C[], int ldc, int batch_count); // dgmm_strided_batched hipblasStatus_t hipblasSdgmmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* A, int lda, hipblasStride stride_A, const float* x, int incx, hipblasStride stride_x, float* C, int ldc, hipblasStride stride_C, int batch_count); hipblasStatus_t hipblasDdgmmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* A, int lda, hipblasStride stride_A, const double* x, int incx, hipblasStride stride_x, double* C, int ldc, hipblasStride stride_C, int batch_count); hipblasStatus_t hipblasCdgmmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* A, int lda, hipblasStride stride_A, const hipblasComplex* x, int incx, hipblasStride stride_x, hipblasComplex* C, int ldc, hipblasStride stride_C, int batch_count); hipblasStatus_t hipblasZdgmmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* A, int lda, hipblasStride stride_A, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, hipblasDoubleComplex* C, int ldc, hipblasStride stride_C, int batch_count); // trsm hipblasStatus_t hipblasStrsmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* A, int lda, float* B, int ldb); hipblasStatus_t hipblasDtrsmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* A, int lda, double* B, int ldb); hipblasStatus_t hipblasCtrsmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* A, int lda, hipblasComplex* B, int ldb); hipblasStatus_t hipblasZtrsmFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb); // trsm_batched hipblasStatus_t hipblasStrsmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const A[], int lda, float* B[], int ldb, int batch_count); hipblasStatus_t hipblasDtrsmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const A[], int lda, double* B[], int ldb, int batch_count); hipblasStatus_t hipblasCtrsmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const A[], int lda, hipblasComplex* B[], int ldb, int batch_count); hipblasStatus_t hipblasZtrsmBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* B[], int ldb, int batch_count); // trsm_strided_batched hipblasStatus_t hipblasStrsmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* A, int lda, hipblasStride strideA, float* B, int ldb, hipblasStride strideB, int batch_count); hipblasStatus_t hipblasDtrsmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* A, int lda, hipblasStride strideA, double* B, int ldb, hipblasStride strideB, int batch_count); hipblasStatus_t hipblasCtrsmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* B, int ldb, hipblasStride strideB, int batch_count); hipblasStatus_t hipblasZtrsmStridedBatchedFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* B, int ldb, hipblasStride strideB, int batch_count); // gemm hipblasStatus_t hipblasHgemmFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const hipblasHalf* alpha, const hipblasHalf* A, int lda, const hipblasHalf* B, int ldb, const hipblasHalf* beta, hipblasHalf* C, int ldc); hipblasStatus_t hipblasSgemmFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); hipblasStatus_t hipblasDgemmFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); hipblasStatus_t hipblasCgemmFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc); hipblasStatus_t hipblasZgemmFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc); // gemm batched hipblasStatus_t hipblasHgemmBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const hipblasHalf* alpha, const hipblasHalf* const A[], int lda, const hipblasHalf* const B[], int ldb, const hipblasHalf* beta, hipblasHalf* const C[], int ldc, int batchCount); hipblasStatus_t hipblasSgemmBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const float* alpha, const float* const A[], int lda, const float* const B[], int ldb, const float* beta, float* const C[], int ldc, int batchCount); hipblasStatus_t hipblasDgemmBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const double* alpha, const double* const A[], int lda, const double* const B[], int ldb, const double* beta, double* const C[], int ldc, int batchCount); hipblasStatus_t hipblasCgemmBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount); hipblasStatus_t hipblasZgemmBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount); // gemm_strided_batched hipblasStatus_t hipblasHgemmStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const hipblasHalf* alpha, const hipblasHalf* A, int lda, long long bsa, const hipblasHalf* B, int ldb, long long bsb, const hipblasHalf* beta, hipblasHalf* C, int ldc, long long bsc, int batchCount); hipblasStatus_t hipblasSgemmStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, long long bsa, const float* B, int ldb, long long bsb, const float* beta, float* C, int ldc, long long bsc, int batchCount); hipblasStatus_t hipblasDgemmStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const double* alpha, const double* A, int lda, long long bsa, const double* B, int ldb, long long bsb, const double* beta, double* C, int ldc, long long bsc, int batchCount); hipblasStatus_t hipblasCgemmStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, long long bsa, const hipblasComplex* B, int ldb, long long bsb, const hipblasComplex* beta, hipblasComplex* C, int ldc, long long bsc, int batchCount); hipblasStatus_t hipblasZgemmStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, long long bsa, const hipblasDoubleComplex* B, int ldb, long long bsb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, long long bsc, int batchCount); // gemmex hipblasStatus_t hipblasGemmExFortran(hipblasHandle_t handle, hipblasOperation_t trans_a, hipblasOperation_t trans_b, int m, int n, int k, const void* alpha, const void* a, hipblasDatatype_t a_type, int lda, const void* b, hipblasDatatype_t b_type, int ldb, const void* beta, void* c, hipblasDatatype_t c_type, int ldc, hipblasDatatype_t compute_type, hipblasGemmAlgo_t algo); hipblasStatus_t hipblasGemmBatchedExFortran(hipblasHandle_t handle, hipblasOperation_t trans_a, hipblasOperation_t trans_b, int m, int n, int k, const void* alpha, const void* a[], hipblasDatatype_t a_type, int lda, const void* b[], hipblasDatatype_t b_type, int ldb, const void* beta, void* c[], hipblasDatatype_t c_type, int ldc, int batch_count, hipblasDatatype_t compute_type, hipblasGemmAlgo_t algo); hipblasStatus_t hipblasGemmStridedBatchedExFortran(hipblasHandle_t handle, hipblasOperation_t trans_a, hipblasOperation_t trans_b, int m, int n, int k, const void* alpha, const void* a, hipblasDatatype_t a_type, int lda, hipblasStride stride_A, const void* b, hipblasDatatype_t b_type, int ldb, hipblasStride stride_B, const void* beta, void* c, hipblasDatatype_t c_type, int ldc, hipblasStride stride_C, int batch_count, hipblasDatatype_t compute_type, hipblasGemmAlgo_t algo); // trsm_ex hipblasStatus_t hipblasTrsmExFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const void* alpha, void* A, int lda, void* B, int ldb, const void* invA, int invA_size, hipblasDatatype_t compute_type); hipblasStatus_t hipblasTrsmBatchedExFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const void* alpha, void* A, int lda, void* B, int ldb, int batch_count, const void* invA, int invA_size, hipblasDatatype_t compute_type); hipblasStatus_t hipblasTrsmStridedBatchedExFortran(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const void* alpha, void* A, int lda, hipblasStride stride_A, void* B, int ldb, hipblasStride stride_B, int batch_count, const void* invA, int invA_size, hipblasStride stride_invA, hipblasDatatype_t compute_type); // // syrk_ex // hipblasStatus_t hipblasCsyrkExFortran(hipblasHandle_t handle, // hipblasFillMode_t uplo, // hipblasOperation_t trans, // int n, // int k, // const hipblasComplex* alpha, // const void* A, // hipblasDatatype_t Atype, // int lda, // const hipblasComplex* beta, // hipblasComplex* C, // hipblasDatatype_t Ctype, // int ldc); // // herk_ex // hipblasStatus_t hipblasCherkExFortran(hipblasHandle_t handle, // hipblasFillMode_t uplo, // hipblasOperation_t trans, // int n, // int k, // const float* alpha, // const void* A, // hipblasDatatype_t Atype, // int lda, // const float* beta, // hipblasComplex* C, // hipblasDatatype_t Ctype, // int ldc); // axpy_ex hipblasStatus_t hipblasAxpyExFortran(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, const void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, hipblasDatatype_t executionType); hipblasStatus_t hipblasAxpyBatchedExFortran(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, const void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, int batch_count, hipblasDatatype_t executionType); hipblasStatus_t hipblasAxpyStridedBatchedExFortran(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, const void* x, hipblasDatatype_t xType, int incx, hipblasStride stridex, void* y, hipblasDatatype_t yType, int incy, hipblasStride stridey, int batch_count, hipblasDatatype_t executionType); // dot_ex hipblasStatus_t hipblasDotExFortran(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, const void* y, hipblasDatatype_t yType, int incy, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); hipblasStatus_t hipblasDotcExFortran(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, const void* y, hipblasDatatype_t yType, int incy, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); hipblasStatus_t hipblasDotBatchedExFortran(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, const void* y, hipblasDatatype_t yType, int incy, int batch_count, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); hipblasStatus_t hipblasDotcBatchedExFortran(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, const void* y, hipblasDatatype_t yType, int incy, int batch_count, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); hipblasStatus_t hipblasDotStridedBatchedExFortran(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, hipblasStride stridex, const void* y, hipblasDatatype_t yType, int incy, hipblasStride stridey, int batch_count, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); hipblasStatus_t hipblasDotcStridedBatchedExFortran(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, hipblasStride stridex, const void* y, hipblasDatatype_t yType, int incy, hipblasStride stridey, int batch_count, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); // nrm2_ex hipblasStatus_t hipblasNrm2ExFortran(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); hipblasStatus_t hipblasNrm2BatchedExFortran(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, int batch_count, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); hipblasStatus_t hipblasNrm2StridedBatchedExFortran(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, hipblasStride stridex, int batch_count, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); // rot_ex hipblasStatus_t hipblasRotExFortran(hipblasHandle_t handle, int n, void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, const void* c, const void* s, hipblasDatatype_t csType, hipblasDatatype_t executionType); hipblasStatus_t hipblasRotBatchedExFortran(hipblasHandle_t handle, int n, void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, const void* c, const void* s, hipblasDatatype_t csType, int batch_count, hipblasDatatype_t executionType); hipblasStatus_t hipblasRotStridedBatchedExFortran(hipblasHandle_t handle, int n, void* x, hipblasDatatype_t xType, int incx, hipblasStride stridex, void* y, hipblasDatatype_t yType, int incy, hipblasStride stridey, const void* c, const void* s, hipblasDatatype_t csType, int batch_count, hipblasDatatype_t executionType); // scal_ex hipblasStatus_t hipblasScalExFortran(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, void* x, hipblasDatatype_t xType, int incx, hipblasDatatype_t executionType); hipblasStatus_t hipblasScalBatchedExFortran(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, void* x, hipblasDatatype_t xType, int incx, int batch_count, hipblasDatatype_t executionType); hipblasStatus_t hipblasScalStridedBatchedExFortran(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, void* x, hipblasDatatype_t xType, int incx, hipblasStride stridex, int batch_count, hipblasDatatype_t executionType); /* ========== * Solver * ========== */ // getrf hipblasStatus_t hipblasSgetrfFortran( hipblasHandle_t handle, const int n, float* A, const int lda, int* ipiv, int* info); hipblasStatus_t hipblasDgetrfFortran( hipblasHandle_t handle, const int n, double* A, const int lda, int* ipiv, int* info); hipblasStatus_t hipblasCgetrfFortran( hipblasHandle_t handle, const int n, hipblasComplex* A, const int lda, int* ipiv, int* info); hipblasStatus_t hipblasZgetrfFortran(hipblasHandle_t handle, const int n, hipblasDoubleComplex* A, const int lda, int* ipiv, int* info); // getrf_batched hipblasStatus_t hipblasSgetrfBatchedFortran(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, int* info, const int batch_count); hipblasStatus_t hipblasDgetrfBatchedFortran(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, int* info, const int batch_count); hipblasStatus_t hipblasCgetrfBatchedFortran(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, int* info, const int batch_count); hipblasStatus_t hipblasZgetrfBatchedFortran(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, int* info, const int batch_count); // getrf_strided_batched hipblasStatus_t hipblasSgetrfStridedBatchedFortran(hipblasHandle_t handle, const int n, float* A, const int lda, const hipblasStride stride_A, int* ipiv, const hipblasStride stride_P, int* info, const int batch_count); hipblasStatus_t hipblasDgetrfStridedBatchedFortran(hipblasHandle_t handle, const int n, double* A, const int lda, const hipblasStride stride_A, int* ipiv, const hipblasStride stride_P, int* info, const int batch_count); hipblasStatus_t hipblasCgetrfStridedBatchedFortran(hipblasHandle_t handle, const int n, hipblasComplex* A, const int lda, const hipblasStride stride_A, int* ipiv, const hipblasStride stride_P, int* info, const int batch_count); hipblasStatus_t hipblasZgetrfStridedBatchedFortran(hipblasHandle_t handle, const int n, hipblasDoubleComplex* A, const int lda, const hipblasStride stride_A, int* ipiv, const hipblasStride stride_P, int* info, const int batch_count); // getrs hipblasStatus_t hipblasSgetrsFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* A, const int lda, const int* ipiv, float* B, const int ldb, int* info); hipblasStatus_t hipblasDgetrsFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* A, const int lda, const int* ipiv, double* B, const int ldb, int* info); hipblasStatus_t hipblasCgetrsFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* A, const int lda, const int* ipiv, hipblasComplex* B, const int ldb, int* info); hipblasStatus_t hipblasZgetrsFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const int* ipiv, hipblasDoubleComplex* B, const int ldb, int* info); // getrs_batched hipblasStatus_t hipblasSgetrsBatchedFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batch_count); hipblasStatus_t hipblasDgetrsBatchedFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batch_count); hipblasStatus_t hipblasCgetrsBatchedFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batch_count); hipblasStatus_t hipblasZgetrsBatchedFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batch_count); // getrs_strided_batched hipblasStatus_t hipblasSgetrsStridedBatchedFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* A, const int lda, const hipblasStride stride_A, const int* ipiv, const hipblasStride stride_P, float* B, const int ldb, const hipblasStride stride_B, int* info, const int batch_count); hipblasStatus_t hipblasDgetrsStridedBatchedFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* A, const int lda, const hipblasStride stride_A, const int* ipiv, const hipblasStride stride_P, double* B, const int ldb, const hipblasStride stride_B, int* info, const int batch_count); hipblasStatus_t hipblasCgetrsStridedBatchedFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* A, const int lda, const hipblasStride stride_A, const int* ipiv, const hipblasStride stride_P, hipblasComplex* B, const int ldb, const hipblasStride stride_B, int* info, const int batch_count); hipblasStatus_t hipblasZgetrsStridedBatchedFortran(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const hipblasStride stride_A, const int* ipiv, const hipblasStride stride_P, hipblasDoubleComplex* B, const int ldb, const hipblasStride stride_B, int* info, const int batch_count); // getri_batched hipblasStatus_t hipblasSgetriBatchedFortran(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batch_count); hipblasStatus_t hipblasDgetriBatchedFortran(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batch_count); hipblasStatus_t hipblasCgetriBatchedFortran(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batch_count); hipblasStatus_t hipblasZgetriBatchedFortran(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batch_count); // geqrf hipblasStatus_t hipblasSgeqrfFortran(hipblasHandle_t handle, const int m, const int n, float* A, const int lda, float* tau, int* info); hipblasStatus_t hipblasDgeqrfFortran(hipblasHandle_t handle, const int m, const int n, double* A, const int lda, double* tau, int* info); hipblasStatus_t hipblasCgeqrfFortran(hipblasHandle_t handle, const int m, const int n, hipblasComplex* A, const int lda, hipblasComplex* tau, int* info); hipblasStatus_t hipblasZgeqrfFortran(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* A, const int lda, hipblasDoubleComplex* tau, int* info); // geqrf_batched hipblasStatus_t hipblasSgeqrfBatchedFortran(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const tau[], int* info, const int batch_count); hipblasStatus_t hipblasDgeqrfBatchedFortran(hipblasHandle_t handle, const int m, const int n, double* const A[], const int lda, double* const tau[], int* info, const int batch_count); hipblasStatus_t hipblasCgeqrfBatchedFortran(hipblasHandle_t handle, const int m, const int n, hipblasComplex* const A[], const int lda, hipblasComplex* const tau[], int* info, const int batch_count); hipblasStatus_t hipblasZgeqrfBatchedFortran(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const tau[], int* info, const int batch_count); // geqrf_strided_batched hipblasStatus_t hipblasSgeqrfStridedBatchedFortran(hipblasHandle_t handle, const int m, const int n, float* A, const int lda, const hipblasStride stride_A, float* tau, const hipblasStride stride_T, int* info, const int batch_count); hipblasStatus_t hipblasDgeqrfStridedBatchedFortran(hipblasHandle_t handle, const int m, const int n, double* A, const int lda, const hipblasStride stride_A, double* tau, const hipblasStride stride_T, int* info, const int batch_count); hipblasStatus_t hipblasCgeqrfStridedBatchedFortran(hipblasHandle_t handle, const int m, const int n, hipblasComplex* A, const int lda, const hipblasStride stride_A, hipblasComplex* tau, const hipblasStride stride_T, int* info, const int batch_count); hipblasStatus_t hipblasZgeqrfStridedBatchedFortran(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* A, const int lda, const hipblasStride stride_A, hipblasDoubleComplex* tau, const hipblasStride stride_T, int* info, const int batch_count); // gels hipblasStatus_t hipblasSgelsFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float* A, const int lda, float* B, const int ldb, int* info, int* deviceInfo); hipblasStatus_t hipblasDgelsFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double* A, const int lda, double* B, const int ldb, int* info, int* deviceInfo); hipblasStatus_t hipblasCgelsFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* A, const int lda, hipblasComplex* B, const int ldb, int* info, int* deviceInfo); hipblasStatus_t hipblasZgelsFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, hipblasDoubleComplex* B, const int ldb, int* info, int* deviceInfo); // gelsBatched hipblasStatus_t hipblasSgelsBatchedFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float* const A[], const int lda, float* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount); hipblasStatus_t hipblasDgelsBatchedFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double* const A[], const int lda, double* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount); hipblasStatus_t hipblasCgelsBatchedFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* const A[], const int lda, hipblasComplex* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount); hipblasStatus_t hipblasZgelsBatchedFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount); // gelsStridedBatched hipblasStatus_t hipblasSgelsStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float* A, const int lda, const hipblasStride strideA, float* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount); hipblasStatus_t hipblasDgelsStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double* A, const int lda, const hipblasStride strideA, double* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount); hipblasStatus_t hipblasCgelsStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* A, const int lda, const hipblasStride strideA, hipblasComplex* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount); hipblasStatus_t hipblasZgelsStridedBatchedFortran(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, hipblasDoubleComplex* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount); } #endif hipBLAS-rocm-5.5.1/clients/include/hipblas_fortran_solver.f90000066400000000000000000001352001434647641600240750ustar00rootroot00000000000000!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Copyright (c) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. ! ! Permission is hereby granted, free of charge, to any person obtaining a copy ! of this software and associated documentation files (the "Software"), to deal ! in the Software without restriction, including without limitation the rights ! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ! copies of the Software, and to permit persons to whom the Software is ! furnished to do so, subject to the following conditions: ! ! The above copyright notice and this permission notice shall be included in ! all copies or substantial portions of the Software. ! ! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ! THE SOFTWARE. ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! module hipblas_interface use iso_c_binding use hipblas contains !--------! ! Solver ! !--------! ! getrf function hipblasSgetrfFortran(handle, n, A, lda, ipiv, info) & bind(c, name='hipblasSgetrfFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgetrfFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: info hipblasSgetrfFortran = & hipblasSgetrf(handle, n, A, lda, ipiv, info) end function hipblasSgetrfFortran function hipblasDgetrfFortran(handle, n, A, lda, ipiv, info) & bind(c, name='hipblasDgetrfFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgetrfFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: info hipblasDgetrfFortran = & hipblasDgetrf(handle, n, A, lda, ipiv, info) end function hipblasDgetrfFortran function hipblasCgetrfFortran(handle, n, A, lda, ipiv, info) & bind(c, name='hipblasCgetrfFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgetrfFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: info hipblasCgetrfFortran = & hipblasCgetrf(handle, n, A, lda, ipiv, info) end function hipblasCgetrfFortran function hipblasZgetrfFortran(handle, n, A, lda, ipiv, info) & bind(c, name='hipblasZgetrfFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgetrfFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: info hipblasZgetrfFortran = & hipblasZgetrf(handle, n, A, lda, ipiv, info) end function hipblasZgetrfFortran ! getrf_batched function hipblasSgetrfBatchedFortran(handle, n, A, lda, ipiv, info, batch_count) & bind(c, name='hipblasSgetrfBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgetrfBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasSgetrfBatchedFortran = & hipblasSgetrfBatched(handle, n, A, lda, ipiv, info, batch_count) end function hipblasSgetrfBatchedFortran function hipblasDgetrfBatchedFortran(handle, n, A, lda, ipiv, info, batch_count) & bind(c, name='hipblasDgetrfBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgetrfBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasDgetrfBatchedFortran = & hipblasDgetrfBatched(handle, n, A, lda, ipiv, info, batch_count) end function hipblasDgetrfBatchedFortran function hipblasCgetrfBatchedFortran(handle, n, A, lda, ipiv, info, batch_count) & bind(c, name='hipblasCgetrfBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgetrfBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasCgetrfBatchedFortran = & hipblasCgetrfBatched(handle, n, A, lda, ipiv, info, batch_count) end function hipblasCgetrfBatchedFortran function hipblasZgetrfBatchedFortran(handle, n, A, lda, ipiv, info, batch_count) & bind(c, name='hipblasZgetrfBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgetrfBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasZgetrfBatchedFortran = & hipblasZgetrfBatched(handle, n, A, lda, ipiv, info, batch_count) end function hipblasZgetrfBatchedFortran ! getrf_strided_batched function hipblasSgetrfStridedBatchedFortran(handle, n, A, lda, stride_A, & ipiv, stride_P, info, batch_count) & bind(c, name='hipblasSgetrfStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgetrfStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: ipiv integer(c_int), value :: stride_P type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasSgetrfStridedBatchedFortran = & hipblasSgetrfStridedBatched(handle, n, A, lda, stride_A, & ipiv, stride_P, info, batch_count) end function hipblasSgetrfStridedBatchedFortran function hipblasDgetrfStridedBatchedFortran(handle, n, A, lda, stride_A, & ipiv, stride_P, info, batch_count) & bind(c, name='hipblasDgetrfStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgetrfStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: ipiv integer(c_int), value :: stride_P type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasDgetrfStridedBatchedFortran = & hipblasDgetrfStridedBatched(handle, n, A, lda, stride_A, & ipiv, stride_P, info, batch_count) end function hipblasDgetrfStridedBatchedFortran function hipblasCgetrfStridedBatchedFortran(handle, n, A, lda, stride_A, & ipiv, stride_P, info, batch_count) & bind(c, name='hipblasCgetrfStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgetrfStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: ipiv integer(c_int), value :: stride_P type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasCgetrfStridedBatchedFortran = & hipblasCgetrfStridedBatched(handle, n, A, lda, stride_A, & ipiv, stride_P, info, batch_count) end function hipblasCgetrfStridedBatchedFortran function hipblasZgetrfStridedBatchedFortran(handle, n, A, lda, stride_A, & ipiv, stride_P, info, batch_count) & bind(c, name='hipblasZgetrfStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgetrfStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: ipiv integer(c_int), value :: stride_P type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasZgetrfStridedBatchedFortran = & hipblasZgetrfStridedBatched(handle, n, A, lda, stride_A, & ipiv, stride_P, info, batch_count) end function hipblasZgetrfStridedBatchedFortran ! getrs function hipblasSgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, & B, ldb, info) & bind(c, name='hipblasSgetrsFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgetrsFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info hipblasSgetrsFortran = & hipblasSgetrs(handle, trans, n, nrhs, A, lda, & ipiv, B, ldb, info) end function hipblasSgetrsFortran function hipblasDgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, & B, ldb, info) & bind(c, name='hipblasDgetrsFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgetrsFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info hipblasDgetrsFortran = & hipblasDgetrs(handle, trans, n, nrhs, A, lda, & ipiv, B, ldb, info) end function hipblasDgetrsFortran function hipblasCgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, & B, ldb, info) & bind(c, name='hipblasCgetrsFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgetrsFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info hipblasCgetrsFortran = & hipblasCgetrs(handle, trans, n, nrhs, A, lda, & ipiv, B, ldb, info) end function hipblasCgetrsFortran function hipblasZgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, & B, ldb, info) & bind(c, name='hipblasZgetrsFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgetrsFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info hipblasZgetrsFortran = & hipblasZgetrs(handle, trans, n, nrhs, A, lda, & ipiv, B, ldb, info) end function hipblasZgetrsFortran ! getrs_batched function hipblasSgetrsBatchedFortran(handle, trans, n, nrhs, A, lda, ipiv, & B, ldb, info, batch_count) & bind(c, name='hipblasSgetrsBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgetrsBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasSgetrsBatchedFortran = & hipblasSgetrsBatched(handle, trans, n, nrhs, A, lda, & ipiv, B, ldb, info, batch_count) end function hipblasSgetrsBatchedFortran function hipblasDgetrsBatchedFortran(handle, trans, n, nrhs, A, lda, ipiv, & B, ldb, info, batch_count) & bind(c, name='hipblasDgetrsBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgetrsBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasDgetrsBatchedFortran = & hipblasDgetrsBatched(handle, trans, n, nrhs, A, lda, & ipiv, B, ldb, info, batch_count) end function hipblasDgetrsBatchedFortran function hipblasCgetrsBatchedFortran(handle, trans, n, nrhs, A, lda, ipiv, & B, ldb, info, batch_count) & bind(c, name='hipblasCgetrsBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgetrsBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasCgetrsBatchedFortran = & hipblasCgetrsBatched(handle, trans, n, nrhs, A, lda, & ipiv, B, ldb, info, batch_count) end function hipblasCgetrsBatchedFortran function hipblasZgetrsBatchedFortran(handle, trans, n, nrhs, A, lda, ipiv, & B, ldb, info, batch_count) & bind(c, name='hipblasZgetrsBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgetrsBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasZgetrsBatchedFortran = & hipblasZgetrsBatched(handle, trans, n, nrhs, A, lda, & ipiv, B, ldb, info, batch_count) end function hipblasZgetrsBatchedFortran ! getrs_strided_batched function hipblasSgetrsStridedBatchedFortran(handle, trans, n, nrhs, A, lda, stride_A, ipiv, & stride_P, B, ldb, stride_B, info, batch_count) & bind(c, name='hipblasSgetrsStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgetrsStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: ipiv integer(c_int), value :: stride_P type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: stride_B type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasSgetrsStridedBatchedFortran = & hipblasSgetrsStridedBatched(handle, trans, n, nrhs, A, lda, stride_A, & ipiv, stride_P, B, ldb, stride_B, info, batch_count) end function hipblasSgetrsStridedBatchedFortran function hipblasDgetrsStridedBatchedFortran(handle, trans, n, nrhs, A, lda, stride_A, ipiv, & stride_P, B, ldb, stride_B, info, batch_count) & bind(c, name='hipblasDgetrsStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgetrsStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: ipiv integer(c_int), value :: stride_P type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: stride_B type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasDgetrsStridedBatchedFortran = & hipblasDgetrsStridedBatched(handle, trans, n, nrhs, A, lda, stride_A, & ipiv, stride_P, B, ldb, stride_B, info, batch_count) end function hipblasDgetrsStridedBatchedFortran function hipblasCgetrsStridedBatchedFortran(handle, trans, n, nrhs, A, lda, stride_A, ipiv, & stride_P, B, ldb, stride_B, info, batch_count) & bind(c, name='hipblasCgetrsStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgetrsStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: ipiv integer(c_int), value :: stride_P type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: stride_B type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasCgetrsStridedBatchedFortran = & hipblasCgetrsStridedBatched(handle, trans, n, nrhs, A, lda, stride_A, & ipiv, stride_P, B, ldb, stride_B, info, batch_count) end function hipblasCgetrsStridedBatchedFortran function hipblasZgetrsStridedBatchedFortran(handle, trans, n, nrhs, A, lda, stride_A, ipiv, & stride_P, B, ldb, stride_B, info, batch_count) & bind(c, name='hipblasZgetrsStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgetrsStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: ipiv integer(c_int), value :: stride_P type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int), value :: stride_B type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasZgetrsStridedBatchedFortran = & hipblasZgetrsStridedBatched(handle, trans, n, nrhs, A, lda, stride_A, & ipiv, stride_P, B, ldb, stride_B, info, batch_count) end function hipblasZgetrsStridedBatchedFortran ! getri_batched function hipblasSgetriBatchedFortran(handle, n, A, lda, ipiv, C, ldc, info, batch_count) & bind(c, name='hipblasSgetriBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgetriBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasSgetriBatchedFortran = & hipblasSgetriBatched(handle, n, A, lda, ipiv, C, ldc, info, batch_count) end function hipblasSgetriBatchedFortran function hipblasDgetriBatchedFortran(handle, n, A, lda, ipiv, C, ldc, info, batch_count) & bind(c, name='hipblasDgetriBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgetriBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasDgetriBatchedFortran = & hipblasDgetriBatched(handle, n, A, lda, ipiv, C, ldc, info, batch_count) end function hipblasDgetriBatchedFortran function hipblasCgetriBatchedFortran(handle, n, A, lda, ipiv, C, ldc, info, batch_count) & bind(c, name='hipblasCgetriBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgetriBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasCgetriBatchedFortran = & hipblasCgetriBatched(handle, n, A, lda, ipiv, C, ldc, info, batch_count) end function hipblasCgetriBatchedFortran function hipblasZgetriBatchedFortran(handle, n, A, lda, ipiv, C, ldc, info, batch_count) & bind(c, name='hipblasZgetriBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgetriBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasZgetriBatchedFortran = & hipblasZgetriBatched(handle, n, A, lda, ipiv, C, ldc, info, batch_count) end function hipblasZgetriBatchedFortran ! geqrf function hipblasSgeqrfFortran(handle, m, n, A, lda, tau, info) & bind(c, name='hipblasSgeqrfFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgeqrfFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: info hipblasSgeqrfFortran = & hipblasSgeqrf(handle, m, n, A, lda, tau, info) end function hipblasSgeqrfFortran function hipblasDgeqrfFortran(handle, m, n, A, lda, tau, info) & bind(c, name='hipblasDgeqrfFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgeqrfFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: info hipblasDgeqrfFortran = & hipblasDgeqrf(handle, m, n, A, lda, tau, info) end function hipblasDgeqrfFortran function hipblasCgeqrfFortran(handle, m, n, A, lda, tau, info) & bind(c, name='hipblasCgeqrfFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgeqrfFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: info hipblasCgeqrfFortran = & hipblasCgeqrf(handle, m, n, A, lda, tau, info) end function hipblasCgeqrfFortran function hipblasZgeqrfFortran(handle, m, n, A, lda, tau, info) & bind(c, name='hipblasZgeqrfFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgeqrfFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: info hipblasZgeqrfFortran = & hipblasZgeqrf(handle, m, n, A, lda, tau, info) end function hipblasZgeqrfFortran ! geqrf_batched function hipblasSgeqrfBatchedFortran(handle, m, n, A, lda, tau, info, batch_count) & bind(c, name='hipblasSgeqrfBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgeqrfBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasSgeqrfBatchedFortran = & hipblasSgeqrfBatched(handle, m, n, A, lda, tau, info, batch_count) end function hipblasSgeqrfBatchedFortran function hipblasDgeqrfBatchedFortran(handle, m, n, A, lda, tau, info, batch_count) & bind(c, name='hipblasDgeqrfBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgeqrfBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasDgeqrfBatchedFortran = & hipblasDgeqrfBatched(handle, m, n, A, lda, tau, info, batch_count) end function hipblasDgeqrfBatchedFortran function hipblasCgeqrfBatchedFortran(handle, m, n, A, lda, tau, info, batch_count) & bind(c, name='hipblasCgeqrfBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgeqrfBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasCgeqrfBatchedFortran = & hipblasCgeqrfBatched(handle, m, n, A, lda, tau, info, batch_count) end function hipblasCgeqrfBatchedFortran function hipblasZgeqrfBatchedFortran(handle, m, n, A, lda, tau, info, batch_count) & bind(c, name='hipblasZgeqrfBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgeqrfBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasZgeqrfBatchedFortran = & hipblasZgeqrfBatched(handle, m, n, A, lda, tau, info, batch_count) end function hipblasZgeqrfBatchedFortran ! geqrf_strided_batched function hipblasSgeqrfStridedBatchedFortran(handle, m, n, A, lda, stride_A, & tau, stride_T, info, batch_count) & bind(c, name='hipblasSgeqrfStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgeqrfStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: tau integer(c_int), value :: stride_T type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasSgeqrfStridedBatchedFortran = & hipblasSgeqrfStridedBatched(handle, m, n, A, lda, stride_A, & tau, stride_T, info, batch_count) end function hipblasSgeqrfStridedBatchedFortran function hipblasDgeqrfStridedBatchedFortran(handle, m, n, A, lda, stride_A, & tau, stride_T, info, batch_count) & bind(c, name='hipblasDgeqrfStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgeqrfStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: tau integer(c_int), value :: stride_T type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasDgeqrfStridedBatchedFortran = & hipblasDgeqrfStridedBatched(handle, m, n, A, lda, stride_A, & tau, stride_T, info, batch_count) end function hipblasDgeqrfStridedBatchedFortran function hipblasCgeqrfStridedBatchedFortran(handle, m, n, A, lda, stride_A, & tau, stride_T, info, batch_count) & bind(c, name='hipblasCgeqrfStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgeqrfStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: tau integer(c_int), value :: stride_T type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasCgeqrfStridedBatchedFortran = & hipblasCgeqrfStridedBatched(handle, m, n, A, lda, stride_A, & tau, stride_T, info, batch_count) end function hipblasCgeqrfStridedBatchedFortran function hipblasZgeqrfStridedBatchedFortran(handle, m, n, A, lda, stride_A, & tau, stride_T, info, batch_count) & bind(c, name='hipblasZgeqrfStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgeqrfStridedBatchedFortran type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int), value :: stride_A type(c_ptr), value :: tau integer(c_int), value :: stride_T type(c_ptr), value :: info integer(c_int), value :: batch_count hipblasZgeqrfStridedBatchedFortran = & hipblasZgeqrfStridedBatched(handle, m, n, A, lda, stride_A, & tau, stride_T, info, batch_count) end function hipblasZgeqrfStridedBatchedFortran ! gels function hipblasSgelsFortran(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo) & bind(c, name='hipblasSgelsFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgelsFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info type(c_ptr), value :: deviceInfo hipblasSgelsFortran = & hipblasSgels(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo) end function hipblasSgelsFortran function hipblasDgelsFortran(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo) & bind(c, name='hipblasDgelsFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgelsFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info type(c_ptr), value :: deviceInfo hipblasDgelsFortran = & hipblasDgels(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo) end function hipblasDgelsFortran function hipblasCgelsFortran(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo) & bind(c, name='hipblasCgelsFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgelsFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info type(c_ptr), value :: deviceInfo hipblasCgelsFortran = & hipblasCgels(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo) end function hipblasCgelsFortran function hipblasZgelsFortran(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo) & bind(c, name='hipblasZgelsFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgelsFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info type(c_ptr), value :: deviceInfo hipblasZgelsFortran = & hipblasZgels(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo) end function hipblasZgelsFortran ! gelsBatched function hipblasSgelsBatchedFortran(handle, trans, m, n, nrhs, A, & lda, B, ldb, info, deviceInfo, batchCount) & bind(c, name = 'hipblasSgelsBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgelsBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info type(c_ptr), value :: deviceInfo integer(c_int), value :: batchCount hipblasSgelsBatchedFortran = & hipblasSgelsBatched(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount) end function hipblasSgelsBatchedFortran function hipblasDgelsBatchedFortran(handle, trans, m, n, nrhs, A, & lda, B, ldb, info, deviceInfo, batchCount) & bind(c, name = 'hipblasDgelsBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgelsBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info type(c_ptr), value :: deviceInfo integer(c_int), value :: batchCount hipblasDgelsBatchedFortran = & hipblasDgelsBatched(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount) end function hipblasDgelsBatchedFortran function hipblasCgelsBatchedFortran(handle, trans, m, n, nrhs, A, & lda, B, ldb, info, deviceInfo, batchCount) & bind(c, name = 'hipblasCgelsBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgelsBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info type(c_ptr), value :: deviceInfo integer(c_int), value :: batchCount hipblasCgelsBatchedFortran = & hipblasCgelsBatched(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount) end function hipblasCgelsBatchedFortran function hipblasZgelsBatchedFortran(handle, trans, m, n, nrhs, A, & lda, B, ldb, info, deviceInfo, batchCount) & bind(c, name = 'hipblasZgelsBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgelsBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: info type(c_ptr), value :: deviceInfo integer(c_int), value :: batchCount hipblasZgelsBatchedFortran = & hipblasZgelsBatched(handle, trans, m, n, nrhs, A, lda, B, ldb, info, deviceInfo, batchCount) end function hipblasZgelsBatchedFortran ! gelsStridedBatched function hipblasSgelsStridedBatchedFortran(handle, trans, m, n, nrhs, A, & lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount) & bind(c, name = 'hipblasSgelsStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasSgelsStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: strideA type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: strideB type(c_ptr), value :: info type(c_ptr), value :: deviceInfo integer(c_int), value :: batchCount hipblasSgelsStridedBatchedFortran = & hipblasSgelsStridedBatched(handle, trans, m, n, nrhs, A, lda, strideA, & B, ldb, strideB, info, deviceInfo, batchCount) end function hipblasSgelsStridedBatchedFortran function hipblasDgelsStridedBatchedFortran(handle, trans, m, n, nrhs, A, & lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount) & bind(c, name = 'hipblasDgelsStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasDgelsStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: strideA type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: strideB type(c_ptr), value :: info type(c_ptr), value :: deviceInfo integer(c_int), value :: batchCount hipblasDgelsStridedBatchedFortran = & hipblasDgelsStridedBatched(handle, trans, m, n, nrhs, A, lda, strideA, & B, ldb, strideB, info, deviceInfo, batchCount) end function hipblasDgelsStridedBatchedFortran function hipblasCgelsStridedBatchedFortran(handle, trans, m, n, nrhs, A, & lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount) & bind(c, name = 'hipblasCgelsStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasCgelsStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: strideA type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: strideB type(c_ptr), value :: info type(c_ptr), value :: deviceInfo integer(c_int), value :: batchCount hipblasCgelsStridedBatchedFortran = & hipblasCgelsStridedBatched(handle, trans, m, n, nrhs, A, lda, strideA, & B, ldb, strideB, info, deviceInfo, batchCount) end function hipblasCgelsStridedBatchedFortran function hipblasZgelsStridedBatchedFortran(handle, trans, m, n, nrhs, A, & lda, strideA, B, ldb, strideB, info, deviceInfo, batchCount) & bind(c, name = 'hipblasZgelsStridedBatchedFortran') use iso_c_binding use hipblas_enums implicit none integer(kind(HIPBLAS_STATUS_SUCCESS)) :: hipblasZgelsStridedBatchedFortran type(c_ptr), value :: handle integer(kind(HIPBLAS_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda integer(c_int64_t), value :: strideA type(c_ptr), value :: B integer(c_int), value :: ldb integer(c_int64_t), value :: strideB type(c_ptr), value :: info type(c_ptr), value :: deviceInfo integer(c_int), value :: batchCount hipblasZgelsStridedBatchedFortran = & hipblasZgelsStridedBatched(handle, trans, m, n, nrhs, A, lda, strideA, & B, ldb, strideB, info, deviceInfo, batchCount) end function hipblasZgelsStridedBatchedFortran end module hipblas_interface hipBLAS-rocm-5.5.1/clients/include/hipblas_no_fortran.hpp000066400000000000000000001101031434647641600233630ustar00rootroot00000000000000 /* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef _HIPBLAS_NO_FORTRAN_HPP #define _HIPBLAS_NO_FORTRAN_HPP /*!\file * This file interfaces with our Fortran BLAS interface. */ /* * ============================================================================ * Fortran functions * ============================================================================ */ /* ========== * Aux * ========== */ #define hipblasSetVectorFortran hipblasSetVector #define hipblasGetVectorFortran hipblasGetVector #define hipblasSetMatrixFortran hipblasSetMatrix #define hipblasGetMatrixFortran hipblasGetMatrix #define hipblasSetVectorAsyncFortran hipblasSetVectorAsync #define hipblasGetVectorAsyncFortran hipblasGetVectorAsync #define hipblasSetMatrixAsyncFortran hipblasSetMatrixAsync #define hipblasGetMatrixAsyncFortran hipblasGetMatrixAsync #define hipblasSetAtomicsModeFortran hipblasSetAtomicsMode #define hipblasGetAtomicsModeFortran hipblasGetAtomicsMode /* ========== * L1 * ========== */ #define hipblasSscalFortran hipblasSscal #define hipblasDscalFortran hipblasDscal #define hipblasCscalFortran hipblasCscal #define hipblasZscalFortran hipblasZscal #define hipblasCsscalFortran hipblasCsscal #define hipblasZdscalFortran hipblasZdscal #define hipblasSscalBatchedFortran hipblasSscalBatched #define hipblasDscalBatchedFortran hipblasDscalBatched #define hipblasCscalBatchedFortran hipblasCscalBatched #define hipblasZscalBatchedFortran hipblasZscalBatched #define hipblasCsscalBatchedFortran hipblasCsscalBatched #define hipblasZdscalBatchedFortran hipblasZdscalBatched #define hipblasSscalStridedBatchedFortran hipblasSscalStridedBatched #define hipblasDscalStridedBatchedFortran hipblasDscalStridedBatched #define hipblasCscalStridedBatchedFortran hipblasCscalStridedBatched #define hipblasZscalStridedBatchedFortran hipblasZscalStridedBatched #define hipblasCsscalStridedBatchedFortran hipblasCsscalStridedBatched #define hipblasZdscalStridedBatchedFortran hipblasZdscalStridedBatched #define hipblasScopyFortran hipblasScopy #define hipblasDcopyFortran hipblasDcopy #define hipblasCcopyFortran hipblasCcopy #define hipblasZcopyFortran hipblasZcopy #define hipblasScopyBatchedFortran hipblasScopyBatched #define hipblasDcopyBatchedFortran hipblasDcopyBatched #define hipblasCcopyBatchedFortran hipblasCcopyBatched #define hipblasZcopyBatchedFortran hipblasZcopyBatched #define hipblasScopyStridedBatchedFortran hipblasScopyStridedBatched #define hipblasDcopyStridedBatchedFortran hipblasDcopyStridedBatched #define hipblasCcopyStridedBatchedFortran hipblasCcopyStridedBatched #define hipblasZcopyStridedBatchedFortran hipblasZcopyStridedBatched #define hipblasSdotFortran hipblasSdot #define hipblasDdotFortran hipblasDdot #define hipblasHdotFortran hipblasHdot #define hipblasBfdotFortran hipblasBfdot #define hipblasCdotuFortran hipblasCdotu #define hipblasCdotcFortran hipblasCdotc #define hipblasZdotuFortran hipblasZdotu #define hipblasZdotcFortran hipblasZdotc #define hipblasSdotBatchedFortran hipblasSdotBatched #define hipblasDdotBatchedFortran hipblasDdotBatched #define hipblasHdotBatchedFortran hipblasHdotBatched #define hipblasBfdotBatchedFortran hipblasBfdotBatched #define hipblasCdotuBatchedFortran hipblasCdotuBatched #define hipblasCdotcBatchedFortran hipblasCdotcBatched #define hipblasZdotuBatchedFortran hipblasZdotuBatched #define hipblasZdotcBatchedFortran hipblasZdotcBatched #define hipblasSdotStridedBatchedFortran hipblasSdotStridedBatched #define hipblasDdotStridedBatchedFortran hipblasDdotStridedBatched #define hipblasHdotStridedBatchedFortran hipblasHdotStridedBatched #define hipblasBfdotStridedBatchedFortran hipblasBfdotStridedBatched #define hipblasCdotuStridedBatchedFortran hipblasCdotuStridedBatched #define hipblasCdotcStridedBatchedFortran hipblasCdotcStridedBatched #define hipblasZdotuStridedBatchedFortran hipblasZdotuStridedBatched #define hipblasZdotcStridedBatchedFortran hipblasZdotcStridedBatched #define hipblasSswapFortran hipblasSswap #define hipblasDswapFortran hipblasDswap #define hipblasCswapFortran hipblasCswap #define hipblasZswapFortran hipblasZswap #define hipblasSswapBatchedFortran hipblasSswapBatched #define hipblasDswapBatchedFortran hipblasDswapBatched #define hipblasCswapBatchedFortran hipblasCswapBatched #define hipblasZswapBatchedFortran hipblasZswapBatched #define hipblasSswapStridedBatchedFortran hipblasSswapStridedBatched #define hipblasDswapStridedBatchedFortran hipblasDswapStridedBatched #define hipblasCswapStridedBatchedFortran hipblasCswapStridedBatched #define hipblasZswapStridedBatchedFortran hipblasZswapStridedBatched #define hipblasHaxpyFortran hipblasHaxpy #define hipblasSaxpyFortran hipblasSaxpy #define hipblasDaxpyFortran hipblasDaxpy #define hipblasCaxpyFortran hipblasCaxpy #define hipblasZaxpyFortran hipblasZaxpy #define hipblasHaxpyBatchedFortran hipblasHaxpyBatched #define hipblasSaxpyBatchedFortran hipblasSaxpyBatched #define hipblasDaxpyBatchedFortran hipblasDaxpyBatched #define hipblasCaxpyBatchedFortran hipblasCaxpyBatched #define hipblasZaxpyBatchedFortran hipblasZaxpyBatched #define hipblasHaxpyStridedBatchedFortran hipblasHaxpyStridedBatched #define hipblasSaxpyStridedBatchedFortran hipblasSaxpyStridedBatched #define hipblasDaxpyStridedBatchedFortran hipblasDaxpyStridedBatched #define hipblasCaxpyStridedBatchedFortran hipblasCaxpyStridedBatched #define hipblasZaxpyStridedBatchedFortran hipblasZaxpyStridedBatched #define hipblasSasumFortran hipblasSasum #define hipblasDasumFortran hipblasDasum #define hipblasScasumFortran hipblasScasum #define hipblasDzasumFortran hipblasDzasum #define hipblasSasumBatchedFortran hipblasSasumBatched #define hipblasDasumBatchedFortran hipblasDasumBatched #define hipblasScasumBatchedFortran hipblasScasumBatched #define hipblasDzasumBatchedFortran hipblasDzasumBatched #define hipblasSasumStridedBatchedFortran hipblasSasumStridedBatched #define hipblasDasumStridedBatchedFortran hipblasDasumStridedBatched #define hipblasScasumStridedBatchedFortran hipblasScasumStridedBatched #define hipblasDzasumStridedBatchedFortran hipblasDzasumStridedBatched #define hipblasSnrm2Fortran hipblasSnrm2 #define hipblasDnrm2Fortran hipblasDnrm2 #define hipblasScnrm2Fortran hipblasScnrm2 #define hipblasDznrm2Fortran hipblasDznrm2 #define hipblasSnrm2BatchedFortran hipblasSnrm2Batched #define hipblasDnrm2BatchedFortran hipblasDnrm2Batched #define hipblasScnrm2BatchedFortran hipblasScnrm2Batched #define hipblasDznrm2BatchedFortran hipblasDznrm2Batched #define hipblasSnrm2StridedBatchedFortran hipblasSnrm2StridedBatched #define hipblasDnrm2StridedBatchedFortran hipblasDnrm2StridedBatched #define hipblasScnrm2StridedBatchedFortran hipblasScnrm2StridedBatched #define hipblasDznrm2StridedBatchedFortran hipblasDznrm2StridedBatched #define hipblasIsamaxFortran hipblasIsamax #define hipblasIdamaxFortran hipblasIdamax #define hipblasIcamaxFortran hipblasIcamax #define hipblasIzamaxFortran hipblasIzamax #define hipblasIsamaxBatchedFortran hipblasIsamaxBatched #define hipblasIdamaxBatchedFortran hipblasIdamaxBatched #define hipblasIcamaxBatchedFortran hipblasIcamaxBatched #define hipblasIzamaxBatchedFortran hipblasIzamaxBatched #define hipblasIsamaxStridedBatchedFortran hipblasIsamaxStridedBatched #define hipblasIdamaxStridedBatchedFortran hipblasIdamaxStridedBatched #define hipblasIcamaxStridedBatchedFortran hipblasIcamaxStridedBatched #define hipblasIzamaxStridedBatchedFortran hipblasIzamaxStridedBatched #define hipblasIsaminFortran hipblasIsamin #define hipblasIdaminFortran hipblasIdamin #define hipblasIcaminFortran hipblasIcamin #define hipblasIzaminFortran hipblasIzamin #define hipblasIsaminBatchedFortran hipblasIsaminBatched #define hipblasIdaminBatchedFortran hipblasIdaminBatched #define hipblasIcaminBatchedFortran hipblasIcaminBatched #define hipblasIzaminBatchedFortran hipblasIzaminBatched #define hipblasIsaminStridedBatchedFortran hipblasIsaminStridedBatched #define hipblasIdaminStridedBatchedFortran hipblasIdaminStridedBatched #define hipblasIcaminStridedBatchedFortran hipblasIcaminStridedBatched #define hipblasIzaminStridedBatchedFortran hipblasIzaminStridedBatched #define hipblasSrotFortran hipblasSrot #define hipblasDrotFortran hipblasDrot #define hipblasCsrotFortran hipblasCsrot #define hipblasZdrotFortran hipblasZdrot #define hipblasCrotFortran hipblasCrot #define hipblasZrotFortran hipblasZrot #define hipblasSrotBatchedFortran hipblasSrotBatched #define hipblasDrotBatchedFortran hipblasDrotBatched #define hipblasCsrotBatchedFortran hipblasCsrotBatched #define hipblasZdrotBatchedFortran hipblasZdrotBatched #define hipblasCrotBatchedFortran hipblasCrotBatched #define hipblasZrotBatchedFortran hipblasZrotBatched #define hipblasSrotStridedBatchedFortran hipblasSrotStridedBatched #define hipblasDrotStridedBatchedFortran hipblasDrotStridedBatched #define hipblasCsrotStridedBatchedFortran hipblasCsrotStridedBatched #define hipblasZdrotStridedBatchedFortran hipblasZdrotStridedBatched #define hipblasCrotStridedBatchedFortran hipblasCrotStridedBatched #define hipblasZrotStridedBatchedFortran hipblasZrotStridedBatched #define hipblasSrotgFortran hipblasSrotg #define hipblasDrotgFortran hipblasDrotg #define hipblasCrotgFortran hipblasCrotg #define hipblasZrotgFortran hipblasZrotg #define hipblasSrotgBatchedFortran hipblasSrotgBatched #define hipblasDrotgBatchedFortran hipblasDrotgBatched #define hipblasCrotgBatchedFortran hipblasCrotgBatched #define hipblasZrotgBatchedFortran hipblasZrotgBatched #define hipblasSrotgStridedBatchedFortran hipblasSrotgStridedBatched #define hipblasDrotgStridedBatchedFortran hipblasDrotgStridedBatched #define hipblasCrotgStridedBatchedFortran hipblasCrotgStridedBatched #define hipblasZrotgStridedBatchedFortran hipblasZrotgStridedBatched #define hipblasSrotmFortran hipblasSrotm #define hipblasDrotmFortran hipblasDrotm #define hipblasSrotmBatchedFortran hipblasSrotmBatched #define hipblasDrotmBatchedFortran hipblasDrotmBatched #define hipblasSrotmStridedBatchedFortran hipblasSrotmStridedBatched #define hipblasDrotmStridedBatchedFortran hipblasDrotmStridedBatched #define hipblasSrotmgFortran hipblasSrotmg #define hipblasDrotmgFortran hipblasDrotmg #define hipblasSrotmgBatchedFortran hipblasSrotmgBatched #define hipblasDrotmgBatchedFortran hipblasDrotmgBatched #define hipblasSrotmgStridedBatchedFortran hipblasSrotmgStridedBatched #define hipblasDrotmgStridedBatchedFortran hipblasDrotmgStridedBatched /* ========== * L2 * ========== */ #define hipblasSgerFortran hipblasSger #define hipblasDgerFortran hipblasDger #define hipblasCgeruFortran hipblasCgeru #define hipblasZgeruFortran hipblasZgeru #define hipblasCgercFortran hipblasCgerc #define hipblasZgercFortran hipblasZgerc #define hipblasSgerBatchedFortran hipblasSgerBatched #define hipblasDgerBatchedFortran hipblasDgerBatched #define hipblasCgeruBatchedFortran hipblasCgeruBatched #define hipblasZgeruBatchedFortran hipblasZgeruBatched #define hipblasCgercBatchedFortran hipblasCgercBatched #define hipblasZgercBatchedFortran hipblasZgercBatched #define hipblasSgerStridedBatchedFortran hipblasSgerStridedBatched #define hipblasDgerStridedBatchedFortran hipblasDgerStridedBatched #define hipblasCgeruStridedBatchedFortran hipblasCgeruStridedBatched #define hipblasZgeruStridedBatchedFortran hipblasZgeruStridedBatched #define hipblasCgercStridedBatchedFortran hipblasCgercStridedBatched #define hipblasZgercStridedBatchedFortran hipblasZgercStridedBatched #define hipblasChbmvFortran hipblasChbmv #define hipblasZhbmvFortran hipblasZhbmv #define hipblasChbmvBatchedFortran hipblasChbmvBatched #define hipblasZhbmvBatchedFortran hipblasZhbmvBatched #define hipblasChbmvStridedBatchedFortran hipblasChbmvStridedBatched #define hipblasZhbmvStridedBatchedFortran hipblasZhbmvStridedBatched #define hipblasChemvFortran hipblasChemv #define hipblasZhemvFortran hipblasZhemv #define hipblasChemvBatchedFortran hipblasChemvBatched #define hipblasZhemvBatchedFortran hipblasZhemvBatched #define hipblasChemvStridedBatchedFortran hipblasChemvStridedBatched #define hipblasZhemvStridedBatchedFortran hipblasZhemvStridedBatched #define hipblasCherFortran hipblasCher #define hipblasZherFortran hipblasZher #define hipblasCherBatchedFortran hipblasCherBatched #define hipblasZherBatchedFortran hipblasZherBatched #define hipblasCherStridedBatchedFortran hipblasCherStridedBatched #define hipblasZherStridedBatchedFortran hipblasZherStridedBatched #define hipblasCher2Fortran hipblasCher2 #define hipblasZher2Fortran hipblasZher2 #define hipblasCher2BatchedFortran hipblasCher2Batched #define hipblasZher2BatchedFortran hipblasZher2Batched #define hipblasCher2StridedBatchedFortran hipblasCher2StridedBatched #define hipblasZher2StridedBatchedFortran hipblasZher2StridedBatched #define hipblasChpmvFortran hipblasChpmv #define hipblasZhpmvFortran hipblasZhpmv #define hipblasChpmvBatchedFortran hipblasChpmvBatched #define hipblasZhpmvBatchedFortran hipblasZhpmvBatched #define hipblasChpmvStridedBatchedFortran hipblasChpmvStridedBatched #define hipblasZhpmvStridedBatchedFortran hipblasZhpmvStridedBatched #define hipblasChprFortran hipblasChpr #define hipblasZhprFortran hipblasZhpr #define hipblasChprBatchedFortran hipblasChprBatched #define hipblasZhprBatchedFortran hipblasZhprBatched #define hipblasChprStridedBatchedFortran hipblasChprStridedBatched #define hipblasZhprStridedBatchedFortran hipblasZhprStridedBatched #define hipblasChpr2Fortran hipblasChpr2 #define hipblasZhpr2Fortran hipblasZhpr2 #define hipblasChpr2BatchedFortran hipblasChpr2Batched #define hipblasZhpr2BatchedFortran hipblasZhpr2Batched #define hipblasChpr2StridedBatchedFortran hipblasChpr2StridedBatched #define hipblasZhpr2StridedBatchedFortran hipblasZhpr2StridedBatched #define hipblasSsbmvFortran hipblasSsbmv #define hipblasDsbmvFortran hipblasDsbmv #define hipblasSsbmvBatchedFortran hipblasSsbmvBatched #define hipblasDsbmvBatchedFortran hipblasDsbmvBatched #define hipblasSsbmvStridedBatchedFortran hipblasSsbmvStridedBatched #define hipblasDsbmvStridedBatchedFortran hipblasDsbmvStridedBatched #define hipblasSspmvFortran hipblasSspmv #define hipblasDspmvFortran hipblasDspmv #define hipblasSspmvBatchedFortran hipblasSspmvBatched #define hipblasDspmvBatchedFortran hipblasDspmvBatched #define hipblasSspmvStridedBatchedFortran hipblasSspmvStridedBatched #define hipblasDspmvStridedBatchedFortran hipblasDspmvStridedBatched #define hipblasSsprFortran hipblasSspr #define hipblasDsprFortran hipblasDspr #define hipblasCsprFortran hipblasCspr #define hipblasZsprFortran hipblasZspr #define hipblasSsprBatchedFortran hipblasSsprBatched #define hipblasDsprBatchedFortran hipblasDsprBatched #define hipblasCsprBatchedFortran hipblasCsprBatched #define hipblasZsprBatchedFortran hipblasZsprBatched #define hipblasSsprStridedBatchedFortran hipblasSsprStridedBatched #define hipblasDsprStridedBatchedFortran hipblasDsprStridedBatched #define hipblasCsprStridedBatchedFortran hipblasCsprStridedBatched #define hipblasZsprStridedBatchedFortran hipblasZsprStridedBatched #define hipblasSspr2Fortran hipblasSspr2 #define hipblasDspr2Fortran hipblasDspr2 #define hipblasSspr2BatchedFortran hipblasSspr2Batched #define hipblasDspr2BatchedFortran hipblasDspr2Batched #define hipblasSspr2StridedBatchedFortran hipblasSspr2StridedBatched #define hipblasDspr2StridedBatchedFortran hipblasDspr2StridedBatched #define hipblasSsymvFortran hipblasSsymv #define hipblasDsymvFortran hipblasDsymv #define hipblasCsymvFortran hipblasCsymv #define hipblasZsymvFortran hipblasZsymv #define hipblasSsymvBatchedFortran hipblasSsymvBatched #define hipblasDsymvBatchedFortran hipblasDsymvBatched #define hipblasCsymvBatchedFortran hipblasCsymvBatched #define hipblasZsymvBatchedFortran hipblasZsymvBatched #define hipblasSsymvStridedBatchedFortran hipblasSsymvStridedBatched #define hipblasDsymvStridedBatchedFortran hipblasDsymvStridedBatched #define hipblasCsymvStridedBatchedFortran hipblasCsymvStridedBatched #define hipblasZsymvStridedBatchedFortran hipblasZsymvStridedBatched #define hipblasSsyrFortran hipblasSsyr #define hipblasDsyrFortran hipblasDsyr #define hipblasCsyrFortran hipblasCsyr #define hipblasZsyrFortran hipblasZsyr #define hipblasSsyrBatchedFortran hipblasSsyrBatched #define hipblasDsyrBatchedFortran hipblasDsyrBatched #define hipblasCsyrBatchedFortran hipblasCsyrBatched #define hipblasZsyrBatchedFortran hipblasZsyrBatched #define hipblasSsyrStridedBatchedFortran hipblasSsyrStridedBatched #define hipblasDsyrStridedBatchedFortran hipblasDsyrStridedBatched #define hipblasCsyrStridedBatchedFortran hipblasCsyrStridedBatched #define hipblasZsyrStridedBatchedFortran hipblasZsyrStridedBatched #define hipblasSsyr2Fortran hipblasSsyr2 #define hipblasDsyr2Fortran hipblasDsyr2 #define hipblasCsyr2Fortran hipblasCsyr2 #define hipblasZsyr2Fortran hipblasZsyr2 #define hipblasSsyr2BatchedFortran hipblasSsyr2Batched #define hipblasDsyr2BatchedFortran hipblasDsyr2Batched #define hipblasCsyr2BatchedFortran hipblasCsyr2Batched #define hipblasZsyr2BatchedFortran hipblasZsyr2Batched #define hipblasSsyr2StridedBatchedFortran hipblasSsyr2StridedBatched #define hipblasDsyr2StridedBatchedFortran hipblasDsyr2StridedBatched #define hipblasCsyr2StridedBatchedFortran hipblasCsyr2StridedBatched #define hipblasZsyr2StridedBatchedFortran hipblasZsyr2StridedBatched #define hipblasStbmvFortran hipblasStbmv #define hipblasDtbmvFortran hipblasDtbmv #define hipblasCtbmvFortran hipblasCtbmv #define hipblasZtbmvFortran hipblasZtbmv #define hipblasStbmvBatchedFortran hipblasStbmvBatched #define hipblasDtbmvBatchedFortran hipblasDtbmvBatched #define hipblasCtbmvBatchedFortran hipblasCtbmvBatched #define hipblasZtbmvBatchedFortran hipblasZtbmvBatched #define hipblasStbmvStridedBatchedFortran hipblasStbmvStridedBatched #define hipblasDtbmvStridedBatchedFortran hipblasDtbmvStridedBatched #define hipblasCtbmvStridedBatchedFortran hipblasCtbmvStridedBatched #define hipblasZtbmvStridedBatchedFortran hipblasZtbmvStridedBatched #define hipblasStbsvFortran hipblasStbsv #define hipblasDtbsvFortran hipblasDtbsv #define hipblasCtbsvFortran hipblasCtbsv #define hipblasZtbsvFortran hipblasZtbsv #define hipblasStbsvBatchedFortran hipblasStbsvBatched #define hipblasDtbsvBatchedFortran hipblasDtbsvBatched #define hipblasCtbsvBatchedFortran hipblasCtbsvBatched #define hipblasZtbsvBatchedFortran hipblasZtbsvBatched #define hipblasStbsvStridedBatchedFortran hipblasStbsvStridedBatched #define hipblasDtbsvStridedBatchedFortran hipblasDtbsvStridedBatched #define hipblasCtbsvStridedBatchedFortran hipblasCtbsvStridedBatched #define hipblasZtbsvStridedBatchedFortran hipblasZtbsvStridedBatched #define hipblasStpmvFortran hipblasStpmv #define hipblasDtpmvFortran hipblasDtpmv #define hipblasCtpmvFortran hipblasCtpmv #define hipblasZtpmvFortran hipblasZtpmv #define hipblasStpmvBatchedFortran hipblasStpmvBatched #define hipblasDtpmvBatchedFortran hipblasDtpmvBatched #define hipblasCtpmvBatchedFortran hipblasCtpmvBatched #define hipblasZtpmvBatchedFortran hipblasZtpmvBatched #define hipblasStpmvStridedBatchedFortran hipblasStpmvStridedBatched #define hipblasDtpmvStridedBatchedFortran hipblasDtpmvStridedBatched #define hipblasCtpmvStridedBatchedFortran hipblasCtpmvStridedBatched #define hipblasZtpmvStridedBatchedFortran hipblasZtpmvStridedBatched #define hipblasStpsvFortran hipblasStpsv #define hipblasDtpsvFortran hipblasDtpsv #define hipblasCtpsvFortran hipblasCtpsv #define hipblasZtpsvFortran hipblasZtpsv #define hipblasStpsvBatchedFortran hipblasStpsvBatched #define hipblasDtpsvBatchedFortran hipblasDtpsvBatched #define hipblasCtpsvBatchedFortran hipblasCtpsvBatched #define hipblasZtpsvBatchedFortran hipblasZtpsvBatched #define hipblasStpsvStridedBatchedFortran hipblasStpsvStridedBatched #define hipblasDtpsvStridedBatchedFortran hipblasDtpsvStridedBatched #define hipblasCtpsvStridedBatchedFortran hipblasCtpsvStridedBatched #define hipblasZtpsvStridedBatchedFortran hipblasZtpsvStridedBatched #define hipblasStrmvFortran hipblasStrmv #define hipblasDtrmvFortran hipblasDtrmv #define hipblasCtrmvFortran hipblasCtrmv #define hipblasZtrmvFortran hipblasZtrmv #define hipblasStrmvBatchedFortran hipblasStrmvBatched #define hipblasDtrmvBatchedFortran hipblasDtrmvBatched #define hipblasCtrmvBatchedFortran hipblasCtrmvBatched #define hipblasZtrmvBatchedFortran hipblasZtrmvBatched #define hipblasStrmvStridedBatchedFortran hipblasStrmvStridedBatched #define hipblasDtrmvStridedBatchedFortran hipblasDtrmvStridedBatched #define hipblasCtrmvStridedBatchedFortran hipblasCtrmvStridedBatched #define hipblasZtrmvStridedBatchedFortran hipblasZtrmvStridedBatched #define hipblasStrsvFortran hipblasStrsv #define hipblasDtrsvFortran hipblasDtrsv #define hipblasCtrsvFortran hipblasCtrsv #define hipblasZtrsvFortran hipblasZtrsv #define hipblasStrsvBatchedFortran hipblasStrsvBatched #define hipblasDtrsvBatchedFortran hipblasDtrsvBatched #define hipblasCtrsvBatchedFortran hipblasCtrsvBatched #define hipblasZtrsvBatchedFortran hipblasZtrsvBatched #define hipblasStrsvStridedBatchedFortran hipblasStrsvStridedBatched #define hipblasDtrsvStridedBatchedFortran hipblasDtrsvStridedBatched #define hipblasCtrsvStridedBatchedFortran hipblasCtrsvStridedBatched #define hipblasZtrsvStridedBatchedFortran hipblasZtrsvStridedBatched #define hipblasSgbmvFortran hipblasSgbmv #define hipblasDgbmvFortran hipblasDgbmv #define hipblasCgbmvFortran hipblasCgbmv #define hipblasZgbmvFortran hipblasZgbmv #define hipblasSgbmvBatchedFortran hipblasSgbmvBatched #define hipblasDgbmvBatchedFortran hipblasDgbmvBatched #define hipblasCgbmvBatchedFortran hipblasCgbmvBatched #define hipblasZgbmvBatchedFortran hipblasZgbmvBatched #define hipblasSgbmvStridedBatchedFortran hipblasSgbmvStridedBatched #define hipblasDgbmvStridedBatchedFortran hipblasDgbmvStridedBatched #define hipblasCgbmvStridedBatchedFortran hipblasCgbmvStridedBatched #define hipblasZgbmvStridedBatchedFortran hipblasZgbmvStridedBatched #define hipblasSgemvFortran hipblasSgemv #define hipblasDgemvFortran hipblasDgemv #define hipblasCgemvFortran hipblasCgemv #define hipblasZgemvFortran hipblasZgemv #define hipblasSgemvBatchedFortran hipblasSgemvBatched #define hipblasDgemvBatchedFortran hipblasDgemvBatched #define hipblasCgemvBatchedFortran hipblasCgemvBatched #define hipblasZgemvBatchedFortran hipblasZgemvBatched #define hipblasSgemvStridedBatchedFortran hipblasSgemvStridedBatched #define hipblasDgemvStridedBatchedFortran hipblasDgemvStridedBatched #define hipblasCgemvStridedBatchedFortran hipblasCgemvStridedBatched #define hipblasZgemvStridedBatchedFortran hipblasZgemvStridedBatched /* ========== * L3 * ========== */ #define hipblasCherkFortran hipblasCherk #define hipblasZherkFortran hipblasZherk #define hipblasCherkBatchedFortran hipblasCherkBatched #define hipblasZherkBatchedFortran hipblasZherkBatched #define hipblasCherkStridedBatchedFortran hipblasCherkStridedBatched #define hipblasZherkStridedBatchedFortran hipblasZherkStridedBatched #define hipblasCher2kFortran hipblasCher2k #define hipblasZher2kFortran hipblasZher2k #define hipblasCher2kBatchedFortran hipblasCher2kBatched #define hipblasZher2kBatchedFortran hipblasZher2kBatched #define hipblasCher2kStridedBatchedFortran hipblasCher2kStridedBatched #define hipblasZher2kStridedBatchedFortran hipblasZher2kStridedBatched #define hipblasCherkxFortran hipblasCherkx #define hipblasZherkxFortran hipblasZherkx #define hipblasCherkxBatchedFortran hipblasCherkxBatched #define hipblasZherkxBatchedFortran hipblasZherkxBatched #define hipblasCherkxStridedBatchedFortran hipblasCherkxStridedBatched #define hipblasZherkxStridedBatchedFortran hipblasZherkxStridedBatched #define hipblasSsymmFortran hipblasSsymm #define hipblasDsymmFortran hipblasDsymm #define hipblasCsymmFortran hipblasCsymm #define hipblasZsymmFortran hipblasZsymm #define hipblasSsymmBatchedFortran hipblasSsymmBatched #define hipblasDsymmBatchedFortran hipblasDsymmBatched #define hipblasCsymmBatchedFortran hipblasCsymmBatched #define hipblasZsymmBatchedFortran hipblasZsymmBatched #define hipblasSsymmStridedBatchedFortran hipblasSsymmStridedBatched #define hipblasDsymmStridedBatchedFortran hipblasDsymmStridedBatched #define hipblasCsymmStridedBatchedFortran hipblasCsymmStridedBatched #define hipblasZsymmStridedBatchedFortran hipblasZsymmStridedBatched #define hipblasSsyrkFortran hipblasSsyrk #define hipblasDsyrkFortran hipblasDsyrk #define hipblasCsyrkFortran hipblasCsyrk #define hipblasZsyrkFortran hipblasZsyrk #define hipblasSsyrkBatchedFortran hipblasSsyrkBatched #define hipblasDsyrkBatchedFortran hipblasDsyrkBatched #define hipblasCsyrkBatchedFortran hipblasCsyrkBatched #define hipblasZsyrkBatchedFortran hipblasZsyrkBatched #define hipblasSsyrkStridedBatchedFortran hipblasSsyrkStridedBatched #define hipblasDsyrkStridedBatchedFortran hipblasDsyrkStridedBatched #define hipblasCsyrkStridedBatchedFortran hipblasCsyrkStridedBatched #define hipblasZsyrkStridedBatchedFortran hipblasZsyrkStridedBatched #define hipblasSsyr2kFortran hipblasSsyr2k #define hipblasDsyr2kFortran hipblasDsyr2k #define hipblasCsyr2kFortran hipblasCsyr2k #define hipblasZsyr2kFortran hipblasZsyr2k #define hipblasSsyr2kBatchedFortran hipblasSsyr2kBatched #define hipblasDsyr2kBatchedFortran hipblasDsyr2kBatched #define hipblasCsyr2kBatchedFortran hipblasCsyr2kBatched #define hipblasZsyr2kBatchedFortran hipblasZsyr2kBatched #define hipblasSsyr2kStridedBatchedFortran hipblasSsyr2kStridedBatched #define hipblasDsyr2kStridedBatchedFortran hipblasDsyr2kStridedBatched #define hipblasCsyr2kStridedBatchedFortran hipblasCsyr2kStridedBatched #define hipblasZsyr2kStridedBatchedFortran hipblasZsyr2kStridedBatched #define hipblasSsyrkxFortran hipblasSsyrkx #define hipblasDsyrkxFortran hipblasDsyrkx #define hipblasCsyrkxFortran hipblasCsyrkx #define hipblasZsyrkxFortran hipblasZsyrkx #define hipblasSsyrkxBatchedFortran hipblasSsyrkxBatched #define hipblasDsyrkxBatchedFortran hipblasDsyrkxBatched #define hipblasCsyrkxBatchedFortran hipblasCsyrkxBatched #define hipblasZsyrkxBatchedFortran hipblasZsyrkxBatched #define hipblasSsyrkxStridedBatchedFortran hipblasSsyrkxStridedBatched #define hipblasDsyrkxStridedBatchedFortran hipblasDsyrkxStridedBatched #define hipblasCsyrkxStridedBatchedFortran hipblasCsyrkxStridedBatched #define hipblasZsyrkxStridedBatchedFortran hipblasZsyrkxStridedBatched #define hipblasSgeamFortran hipblasSgeam #define hipblasDgeamFortran hipblasDgeam #define hipblasCgeamFortran hipblasCgeam #define hipblasZgeamFortran hipblasZgeam #define hipblasSgeamBatchedFortran hipblasSgeamBatched #define hipblasDgeamBatchedFortran hipblasDgeamBatched #define hipblasCgeamBatchedFortran hipblasCgeamBatched #define hipblasZgeamBatchedFortran hipblasZgeamBatched #define hipblasSgeamStridedBatchedFortran hipblasSgeamStridedBatched #define hipblasDgeamStridedBatchedFortran hipblasDgeamStridedBatched #define hipblasCgeamStridedBatchedFortran hipblasCgeamStridedBatched #define hipblasZgeamStridedBatchedFortran hipblasZgeamStridedBatched #define hipblasChemmFortran hipblasChemm #define hipblasZhemmFortran hipblasZhemm #define hipblasChemmBatchedFortran hipblasChemmBatched #define hipblasZhemmBatchedFortran hipblasZhemmBatched #define hipblasChemmStridedBatchedFortran hipblasChemmStridedBatched #define hipblasZhemmStridedBatchedFortran hipblasZhemmStridedBatched #define hipblasStrmmFortran hipblasStrmm #define hipblasDtrmmFortran hipblasDtrmm #define hipblasCtrmmFortran hipblasCtrmm #define hipblasZtrmmFortran hipblasZtrmm #define hipblasStrmmBatchedFortran hipblasStrmmBatched #define hipblasDtrmmBatchedFortran hipblasDtrmmBatched #define hipblasCtrmmBatchedFortran hipblasCtrmmBatched #define hipblasZtrmmBatchedFortran hipblasZtrmmBatched #define hipblasStrmmStridedBatchedFortran hipblasStrmmStridedBatched #define hipblasDtrmmStridedBatchedFortran hipblasDtrmmStridedBatched #define hipblasCtrmmStridedBatchedFortran hipblasCtrmmStridedBatched #define hipblasZtrmmStridedBatchedFortran hipblasZtrmmStridedBatched #define hipblasStrtriFortran hipblasStrtri #define hipblasDtrtriFortran hipblasDtrtri #define hipblasCtrtriFortran hipblasCtrtri #define hipblasZtrtriFortran hipblasZtrtri #define hipblasStrtriBatchedFortran hipblasStrtriBatched #define hipblasDtrtriBatchedFortran hipblasDtrtriBatched #define hipblasCtrtriBatchedFortran hipblasCtrtriBatched #define hipblasZtrtriBatchedFortran hipblasZtrtriBatched #define hipblasStrtriStridedBatchedFortran hipblasStrtriStridedBatched #define hipblasDtrtriStridedBatchedFortran hipblasDtrtriStridedBatched #define hipblasCtrtriStridedBatchedFortran hipblasCtrtriStridedBatched #define hipblasZtrtriStridedBatchedFortran hipblasZtrtriStridedBatched #define hipblasSdgmmFortran hipblasSdgmm #define hipblasDdgmmFortran hipblasDdgmm #define hipblasCdgmmFortran hipblasCdgmm #define hipblasZdgmmFortran hipblasZdgmm #define hipblasSdgmmBatchedFortran hipblasSdgmmBatched #define hipblasDdgmmBatchedFortran hipblasDdgmmBatched #define hipblasCdgmmBatchedFortran hipblasCdgmmBatched #define hipblasZdgmmBatchedFortran hipblasZdgmmBatched #define hipblasSdgmmStridedBatchedFortran hipblasSdgmmStridedBatched #define hipblasDdgmmStridedBatchedFortran hipblasDdgmmStridedBatched #define hipblasCdgmmStridedBatchedFortran hipblasCdgmmStridedBatched #define hipblasZdgmmStridedBatchedFortran hipblasZdgmmStridedBatched #define hipblasStrsmFortran hipblasStrsm #define hipblasDtrsmFortran hipblasDtrsm #define hipblasCtrsmFortran hipblasCtrsm #define hipblasZtrsmFortran hipblasZtrsm #define hipblasStrsmBatchedFortran hipblasStrsmBatched #define hipblasDtrsmBatchedFortran hipblasDtrsmBatched #define hipblasCtrsmBatchedFortran hipblasCtrsmBatched #define hipblasZtrsmBatchedFortran hipblasZtrsmBatched #define hipblasStrsmStridedBatchedFortran hipblasStrsmStridedBatched #define hipblasDtrsmStridedBatchedFortran hipblasDtrsmStridedBatched #define hipblasCtrsmStridedBatchedFortran hipblasCtrsmStridedBatched #define hipblasZtrsmStridedBatchedFortran hipblasZtrsmStridedBatched #define hipblasHgemmFortran hipblasHgemm #define hipblasSgemmFortran hipblasSgemm #define hipblasDgemmFortran hipblasDgemm #define hipblasCgemmFortran hipblasCgemm #define hipblasZgemmFortran hipblasZgemm #define hipblasHgemmBatchedFortran hipblasHgemmBatched #define hipblasSgemmBatchedFortran hipblasSgemmBatched #define hipblasDgemmBatchedFortran hipblasDgemmBatched #define hipblasCgemmBatchedFortran hipblasCgemmBatched #define hipblasZgemmBatchedFortran hipblasZgemmBatched #define hipblasHgemmStridedBatchedFortran hipblasHgemmStridedBatched #define hipblasSgemmStridedBatchedFortran hipblasSgemmStridedBatched #define hipblasDgemmStridedBatchedFortran hipblasDgemmStridedBatched #define hipblasCgemmStridedBatchedFortran hipblasCgemmStridedBatched #define hipblasZgemmStridedBatchedFortran hipblasZgemmStridedBatched #define hipblasGemmExFortran hipblasGemmEx #define hipblasGemmBatchedExFortran hipblasGemmBatchedEx #define hipblasGemmStridedBatchedExFortran hipblasGemmStridedBatchedEx #define hipblasTrsmExFortran hipblasTrsmEx #define hipblasTrsmBatchedExFortran hipblasTrsmBatchedEx #define hipblasTrsmStridedBatchedExFortran hipblasTrsmStridedBatchedEx #define hipblasAxpyExFortran hipblasAxpyEx #define hipblasAxpyBatchedExFortran hipblasAxpyBatchedEx #define hipblasAxpyStridedBatchedExFortran hipblasAxpyStridedBatchedEx #define hipblasDotExFortran hipblasDotEx #define hipblasDotBatchedExFortran hipblasDotBatchedEx #define hipblasDotStridedBatchedExFortran hipblasDotStridedBatchedEx #define hipblasDotcExFortran hipblasDotcEx #define hipblasDotcBatchedExFortran hipblasDotcBatchedEx #define hipblasDotcStridedBatchedExFortran hipblasDotcStridedBatchedEx #define hipblasNrm2ExFortran hipblasNrm2Ex #define hipblasNrm2BatchedExFortran hipblasNrm2BatchedEx #define hipblasNrm2StridedBatchedExFortran hipblasNrm2StridedBatchedEx #define hipblasRotExFortran hipblasRotEx #define hipblasRotBatchedExFortran hipblasRotBatchedEx #define hipblasRotStridedBatchedExFortran hipblasRotStridedBatchedEx #define hipblasScalExFortran hipblasScalEx #define hipblasScalBatchedExFortran hipblasScalBatchedEx #define hipblasScalStridedBatchedExFortran hipblasScalStridedBatchedEx /* ========== * Solver * ========== */ #define hipblasSgetrfFortran hipblasSgetrf #define hipblasDgetrfFortran hipblasDgetrf #define hipblasCgetrfFortran hipblasCgetrf #define hipblasZgetrfFortran hipblasZgetrf #define hipblasSgetrfBatchedFortran hipblasSgetrfBatched #define hipblasDgetrfBatchedFortran hipblasDgetrfBatched #define hipblasCgetrfBatchedFortran hipblasCgetrfBatched #define hipblasZgetrfBatchedFortran hipblasZgetrfBatched #define hipblasSgetrfStridedBatchedFortran hipblasSgetrfStridedBatched #define hipblasDgetrfStridedBatchedFortran hipblasDgetrfStridedBatched #define hipblasCgetrfStridedBatchedFortran hipblasCgetrfStridedBatched #define hipblasZgetrfStridedBatchedFortran hipblasZgetrfStridedBatched #define hipblasSgetrsFortran hipblasSgetrs #define hipblasDgetrsFortran hipblasDgetrs #define hipblasCgetrsFortran hipblasCgetrs #define hipblasZgetrsFortran hipblasZgetrs #define hipblasSgetrsBatchedFortran hipblasSgetrsBatched #define hipblasDgetrsBatchedFortran hipblasDgetrsBatched #define hipblasCgetrsBatchedFortran hipblasCgetrsBatched #define hipblasZgetrsBatchedFortran hipblasZgetrsBatched #define hipblasSgetrsStridedBatchedFortran hipblasSgetrsStridedBatched #define hipblasDgetrsStridedBatchedFortran hipblasDgetrsStridedBatched #define hipblasCgetrsStridedBatchedFortran hipblasCgetrsStridedBatched #define hipblasZgetrsStridedBatchedFortran hipblasZgetrsStridedBatched #define hipblasSgetriFortran hipblasSgetri #define hipblasDgetriFortran hipblasDgetri #define hipblasCgetriFortran hipblasCgetri #define hipblasZgetriFortran hipblasZgetri #define hipblasSgetriBatchedFortran hipblasSgetriBatched #define hipblasDgetriBatchedFortran hipblasDgetriBatched #define hipblasCgetriBatchedFortran hipblasCgetriBatched #define hipblasZgetriBatchedFortran hipblasZgetriBatched #define hipblasSgetriStridedBatchedFortran hipblasSgetriStridedBatched #define hipblasDgetriStridedBatchedFortran hipblasDgetriStridedBatched #define hipblasCgetriStridedBatchedFortran hipblasCgetriStridedBatched #define hipblasZgetriStridedBatchedFortran hipblasZgetriStridedBatched #define hipblasSgeqrfFortran hipblasSgeqrf #define hipblasDgeqrfFortran hipblasDgeqrf #define hipblasCgeqrfFortran hipblasCgeqrf #define hipblasZgeqrfFortran hipblasZgeqrf #define hipblasSgelsFortran hipblasSgels #define hipblasDgelsFortran hipblasDgels #define hipblasCgelsFortran hipblasCgels #define hipblasZgelsFortran hipblasZgels #define hipblasSgelsBatchedFortran hipblasSgelsBatched #define hipblasDgelsBatchedFortran hipblasDgelsBatched #define hipblasCgelsBatchedFortran hipblasCgelsBatched #define hipblasZgelsBatchedFortran hipblasZgelsBatched #define hipblasSgelsStridedBatchedFortran hipblasSgelsStridedBatched #define hipblasDgelsStridedBatchedFortran hipblasDgelsStridedBatched #define hipblasCgelsStridedBatchedFortran hipblasCgelsStridedBatched #define hipblasZgelsStridedBatchedFortran hipblasZgelsStridedBatched #define hipblasSgeqrfBatchedFortran hipblasSgeqrfBatched #define hipblasDgeqrfBatchedFortran hipblasDgeqrfBatched #define hipblasCgeqrfBatchedFortran hipblasCgeqrfBatched #define hipblasZgeqrfBatchedFortran hipblasZgeqrfBatched #define hipblasSgeqrfStridedBatchedFortran hipblasSgeqrfStridedBatched #define hipblasDgeqrfStridedBatchedFortran hipblasDgeqrfStridedBatched #define hipblasCgeqrfStridedBatchedFortran hipblasCgeqrfStridedBatched #define hipblasZgeqrfStridedBatchedFortran hipblasZgeqrfStridedBatched #endif hipBLAS-rocm-5.5.1/clients/include/hipblas_parse_data.hpp000066400000000000000000000030001434647641600233140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef _HIPBLAS_PARSE_DATA_H #define _HIPBLAS_PARSE_DATA_H #include // Parse --data and --yaml command-line arguments bool hipblas_parse_data(int& argc, char** argv, const std::string& default_file = ""); #endif hipBLAS-rocm-5.5.1/clients/include/hipblas_smoke.yaml000066400000000000000000000613501434647641600225160ustar00rootroot00000000000000--- include: hipblas_common.yaml #include: known_bugs.yaml # This is a minimal smoke test of rocblas functionality # It will run at least a single small sized function test for all of the L1, L2, and L3 functions. # Algorithm variations controlled by enums will be tested but those that are selected by size will not be # exhaustive as very small sizes are only used Definitions: ############ # Level 1+ # ############ - &incx_range - [1] - &N_range - [513] - &batch_count_range - [2] - &incx_incy_range - { incx: 1, incy: 1 } - &incx_incy_range_x_y_output - { incx: 1, incy: 1 } - &incx_incy_range_y_output - { incx: 1, incy: 1 } - &alpha_range - [0.5] - &alpha_beta_range - { alpha: 0.5, beta: -1.0 } - &alphai_betai_range - { alphai: 0.5 } - &complex_alpha_range - { alpha: 1.5, alphai: -1.0 } - &complex_alpha_beta_range - { alpha: -0.5, beta: -0.5, alphai: 0.5, betai: 0.5 } ########### # Level 2 # ########### - &gbmv_dims - { M: 20, N: 30, lda: 50, KL: 14, KU: 14 } - &gbmv_common_args precision: *single_double_precisions_complex_real transA: [ N, T ] incx_incy: *incx_incy_range alpha_beta: *alpha_beta_range matrix_size: *gbmv_dims - &gemv_dims - { M: 65, N: 65, lda: 65, stride_a: 1 } - &gemv_common_args precision: *single_double_precisions_complex_real transA: [ N, T ] incx_incy: *incx_incy_range alpha_beta: *alpha_beta_range matrix_size: *gemv_dims - &hbmv_dims - { N: 65, K: 64, lda: 65 } - &hbmv_common_args precision: *single_double_precisions_complex uplo: [ U, L ] incx_incy: *incx_incy_range alpha_beta: *complex_alpha_beta_range matrix_size: *hbmv_dims - &hemv_dims - { N: 65, lda: 65 } - &hemv_common_args precision: *single_double_precisions_complex uplo: [ U, L ] incx_incy: *incx_incy_range alpha_beta: *complex_alpha_beta_range matrix_size: *hemv_dims - &her_dims - { N: 65, lda: 65 } - &her_common_args precision: *single_double_precisions_complex uplo: [ U, L ] alpha_beta: *complex_alpha_beta_range incx: *incx_range matrix_size: *her_dims - &her2_dims - { N: 65, lda: 65 } - &her2_common_args precision: *single_double_precisions_complex uplo: [ U, L ] alpha_beta: *complex_alpha_beta_range incx_incy: *incx_incy_range matrix_size: *her2_dims - &hpmv_dims - { N: 65 } - &hpmv_common_args precision: *single_double_precisions_complex uplo: [ U, L ] incx_incy: *incx_incy_range alpha_beta: *complex_alpha_beta_range matrix_size: *hpmv_dims - &hpr_dims - { N: 65 } - &hpr_common_args precision: *single_double_precisions_complex uplo: [ U, L ] alpha_beta: *complex_alpha_range incx: *incx_range matrix_size: *hpr_dims - &hpr2_dims - { N: 65 } - &hpr2_common_args precision: *single_double_precisions_complex uplo: [ U, L ] alpha_beta: *complex_alpha_range incx_incy: *incx_incy_range matrix_size: *hpr2_dims - &ger_dims - { M: 65, N: 65, lda: 65, stride_a: 4225} - &ger_common_args precision: *single_double_precisions incx_incy: *incx_incy_range alpha: *alpha_range matrix_size: *ger_dims - &geruc_dims - { M: 65, N: 65, lda: 65, stride_a: 4225} - &geruc_common_args precision: *single_double_precisions_complex incx_incy: *incx_incy_range alpha: *alpha_range matrix_size: *geruc_dims - &spr_dims - { N: 65 } - &spr_common_args precision: *single_double_precisions_complex_real uplo: [ U, L ] alpha_beta: *complex_alpha_range incx: *incx_range matrix_size: *spr_dims - &spr2_dims - { N: 65 } - &spr2_common_args precision: *single_double_precisions uplo: [ U, L ] alpha: *alpha_range incx_incy: *incx_incy_range matrix_size: *spr2_dims - &syr_dims - { N: 65, lda: 65 } - &syr_common_args precision: *single_double_precisions_complex_real uplo: [ U, L ] alpha_beta: *complex_alpha_range incx: *incx_range matrix_size: *syr_dims - &syr2_dims - { N: 65, lda: 65 } - &syr2_common_args precision: *single_double_precisions_complex uplo: [ U, L ] alpha_beta: *complex_alpha_range incx_incy: *incx_incy_range matrix_size: *syr2_dims - &symv_dims - { N: 65, lda: 65 } - &symv_common_args precision: *single_double_precisions_complex_real uplo: [ U, L ] incx_incy: *incx_incy_range alpha_beta: *complex_alpha_beta_range matrix_size: *symv_dims - &sbmv_dims - { N: 65, K: 11, lda: 65 } - &sbmv_common_args precision: *single_double_precisions uplo: [ U, L ] incx_incy: *incx_incy_range alpha_beta: *alpha_beta_range matrix_size: *sbmv_dims - &spmv_dims - { N: 65, K: 11, lda: 65 } - &spmv_common_args precision: *single_double_precisions uplo: [ U, L ] incx_incy: *incx_incy_range alpha_beta: *alpha_beta_range matrix_size: *spmv_dims - &tbmv_dims - { M: 257, K: 11, lda: 257 } - &tbmv_common_args precision: *single_double_precisions_complex_real uplo: [U, L] transA: [N, T] diag: [N, U] incx: *incx_range matrix_size: *tbmv_dims - &tpmv_dims - { M: 290, stride_a: 1 } - &tpmv_common_args precision: *single_double_precisions_complex_real uplo: [U, L] transA: [N, T] diag: [N, U] incx: *incx_range matrix_size: *tpmv_dims - &trmv_dims - { M: 257, lda: 257, stride_a: 1 } - &trmv_common_args precision: *single_double_precisions_complex_real uplo: [U, L] transA: [N, T, C] diag: [N, U] incx: *incx_range matrix_size: *trmv_dims - &tbsv_dims - { N: 65, K: 5, lda: 65 } - &tbsv_common_args precision: *single_double_precisions_complex_real uplo: [U, L] transA: [N, T] diag: [N, U] incx: *incx_range matrix_size: *tbsv_dims - &tpsv_dims - { N: 65 } - &tpsv_common_args precision: *single_double_precisions_complex_real uplo: [U, L] transA: [N, T] diag: [N, U] incx: *incx_range matrix_size: *tpsv_dims - &trsv_dims - { M: 33, lda: 33 } - &trsv_common_args precision: *single_double_precisions_complex_real uplo: [U, L] transA: [N, T] diag: [N, U] incx: *incx_range matrix_size: *trsv_dims ########### # Level 3 # ########### - &transA_transB_range - { transA: [N,T,C], transB: [N,T,C] } - &dgmm_dims - { M: 33, N: 33, lda: 35, incx: 2, ldc: 35 } - &dgmm_common_args precision: *single_double_precisions_complex_real side: [L, R] matrix_size: *dgmm_dims - &geam_dims - { M: 3, N: 33, lda: 35, ldb: 35, ldc: 35 } - &geam_common_args precision: *single_double_precisions_complex_real transA_transB: *transA_transB_range alpha_beta: *complex_alpha_beta_range matrix_size: *geam_dims - &hemm_dims - { M: 12, N: 33, lda: 33, ldb: 33, ldc: 33 } - &hemm_common_args precision: *single_double_precisions_complex uplo: [ U, L ] side: [L, R] transA: [ N, C ] alpha_beta: *alpha_beta_range matrix_size: *hemm_dims - &herk_dims - { N: 65, lda: 65, K: 33, ldc: 65 } - &herk_common_args precision: *single_double_precisions_complex uplo: [ U, L ] transA: [ N, C ] alpha_beta: *complex_alpha_beta_range matrix_size: *herk_dims - &her2k_dims - { N: 12, K: 9, lda: 12, ldb: 12, ldc: 12 } - &her2k_common_args precision: *single_double_precisions_complex uplo: [ U, L ] transA: [ N, C ] alpha_beta: *complex_alpha_beta_range matrix_size: *her2k_dims - &herkx_dims - { N: 12, K: 9, lda: 12, ldb: 12, ldc: 12 } - &herkx_common_args precision: *single_double_precisions_complex uplo: [ U, L ] transA: [ N, C ] alpha_beta: *complex_alpha_beta_range matrix_size: *herkx_dims - &symm_dims - { M: 12, N: 33, lda: 33, ldb: 33, ldc: 33 } - &symm_common_args precision: *single_double_precisions_complex_real side: [ L, R ] uplo: [ U, L ] alpha_beta: *complex_alpha_beta_range matrix_size: *symm_dims - &syrk_dims - { N: 12, K: 9, lda: 12, ldb: 12, ldc: 12 } - &syrk_common_args precision: *single_double_precisions_complex_real uplo: [ U, L ] transA: [ N, T ] alpha_beta: *complex_alpha_beta_range matrix_size: *syrk_dims - &syr2k_dims - { N: 12, K: 9, lda: 12, ldb: 12, ldc: 12 } - &syr2k_common_args precision: *single_double_precisions_complex_real uplo: [ U, L ] transA: [ N, T ] matrix_size: *syr2k_dims - &syrkx_dims - { N: 12, K: 9, lda: 12, ldb: 12, ldc: 12 } - &syrkx_common_args precision: *single_double_precisions_complex_real uplo: [ U, L ] transA: [ N, T ] alpha_beta: *complex_alpha_beta_range matrix_size: *syrkx_dims - &gemm_dims - { M: 65, N: 33, K: 33, lda: 128, ldb: 128, ldc: 128 } - &gemm_common_args precision: *single_double_precisions_complex_real transA_transB: *transA_transB_range alpha_beta: *complex_alpha_beta_range matrix_size: *gemm_dims - &gemm_ex_dims - { M: 65, N: 33, K: 33, lda: 128, ldb: 128, ldc: 128, ld: 128 } - &gemm_ex_common_args precision: *hpa_half_precision transA_transB: *transA_transB_range alpha_beta: *complex_alpha_beta_range matrix_size: *gemm_ex_dims Tests: ########### # Level 1 # ########### # All functions with no parameters to scan over - name: blas1 category: quick function: - rotg: *rotg_precisions - rotmg: *single_double_precisions #*single_double_precisions_complex_real - name: blas1_batched category: quick batch_count: *batch_count_range function: - rotg_batched: *rotg_precisions - rotmg_batched: *single_double_precisions #*single_double_precisions_complex_real - name: blas1_strided_batched category: quick batch_count: *batch_count_range stride_scale: [ 1.0 ] function: - rotg_strided_batched: *rotg_precisions - rotmg_strided_batched: *single_double_precisions #*single_double_precisions_complex_real # All functions with alpha and incx and incy - name: blas1_with_alpha category: quick N: *N_range incx_incy: *incx_incy_range_y_output alpha_beta: *alpha_beta_range alphai_betai: *alphai_betai_range function: #- axpy: *half_single_precisions_complex_real - axpy: *single_double_precisions_complex_real - axpy_ex: *half_single_double_complex_real_precisions - name: axpy_ex_compute_check category: quick N: *N_range incx_incy: *incx_incy_range alpha_beta: *alpha_beta_range function: - axpy_ex: *hpa_half_precision - name: blas1_batched_with_alpha category: quick N: *N_range incx_incy: *incx_incy_range_y_output alpha_beta: *alpha_beta_range alphai_betai: *alphai_betai_range batch_count: *batch_count_range function: #- axpy_batched: *half_single_precisions_complex_real - axpy_batched: *single_double_precisions_complex_real - axpy_batched_ex: *half_single_double_complex_real_precisions - name: blas1_strided_batched_with_alpha category: quick N: *N_range incx_incy: *incx_incy_range_y_output batch_count: *batch_count_range stride_scale: [ 1 ] alpha_beta: *alpha_beta_range alphai_betai: *alphai_betai_range function: #- axpy_strided_batched: *half_single_precisions_complex_real - axpy_strided_batched: *single_double_precisions_complex_real - axpy_strided_batched_ex: *half_single_double_complex_real_precisions # All functions with alpha and incx but no incy - name: blas1_with_alpha category: quick N: *N_range incx: *incx_range alpha_beta: *alpha_beta_range alphai_betai: *alphai_betai_range function: - scal: *single_double_precisions_complex_real #- scal: *scal_ex_single_double_complex_real_alpha_complex_out - scal_ex: *single_double_precisions_complex_real #- scal_ex: *scal_ex_single_double_complex_real_alpha_complex_out - scal_ex: *hpa_half_half_precisions - name: blas1_batched category: quick N: *N_range incx: *incx_range alpha_beta: *alpha_beta_range alphai_betai: *alphai_betai_range batch_count: *batch_count_range function: - scal_batched: *single_double_precisions_complex_real #- scal_batched: *scal_ex_single_double_complex_real_alpha_complex_out - scal_batched_ex: *single_double_precisions_complex_real #- scal_batched_ex: *scal_ex_single_double_complex_real_alpha_complex_out - scal_batched_ex: *hpa_half_half_precisions - name: blas1_strided_batched category: quick N: *N_range incx: *incx_range alpha_beta: *alpha_beta_range alphai_betai: *alphai_betai_range batch_count: *batch_count_range stride_scale: [ 1 ] function: - scal_strided_batched: *single_double_precisions_complex_real #- scal_strided_batched: *scal_ex_single_double_complex_real_alpha_complex_out - scal_strided_batched_ex: *single_double_precisions_complex_real #- scal_strided_batched_ex: *scal_ex_single_double_complex_real_alpha_complex_out - scal_strided_batched_ex: *hpa_half_half_precisions # All functions with incx, no incy, no alpha # nrm2,asum,iamax,iamin all variants - name: blas1 category: quick N: *N_range incx: *incx_range function: - nrm2: *single_double_precisions_complex_real - asum: *single_double_precisions_complex_real - iamax: *single_double_precisions_complex_real - iamin: *single_double_precisions_complex_real - name: blas1_batched category: quick N: *N_range incx: *incx_range batch_count: *batch_count_range function: - asum_batched: *single_double_precisions_complex_real - nrm2_batched: *single_double_precisions_complex_real - iamax_batched: *single_double_precisions_complex_real - iamin_batched: *single_double_precisions_complex_real - name: blas1_strided_batched category: quick N: *N_range incx: *incx_range batch_count: *batch_count_range stride_scale: [ 1.0 ] function: - asum_strided_batched: *single_double_precisions_complex_real - nrm2_strided_batched: *single_double_precisions_complex_real - iamax_strided_batched: *single_double_precisions_complex_real - iamin_strided_batched: *single_double_precisions_complex_real # All functions with incx, incy, no alpha - name: blas1 category: quick N: *N_range incx_incy: *incx_incy_range function: - copy: *single_double_precisions_complex_real #- dot: *half_bfloat_single_double_complex_real_precisions - dot: *single_double_precisions_complex_real - dotc: *single_double_precisions_complex - swap: *single_double_precisions_complex_real - rot: *rot_precisions - rotm: *single_double_precisions #*single_double_precisions_complex_real - name: blas1_batched category: quick N: *N_range incx_incy: *incx_incy_range batch_count: *batch_count_range function: - copy_batched: *single_double_precisions_complex_real #- dot_batched: *half_bfloat_single_double_complex_real_precisions - dot_batched: *single_double_precisions_complex_real - dotc_batched: *single_double_precisions_complex - swap_batched: *single_double_precisions_complex_real - rot_batched: *rot_precisions - rotm_batched: *single_double_precisions #*single_double_precisions_complex_real - name: blas1_strided_batched category: quick N: *N_range incx_incy: *incx_incy_range batch_count: *batch_count_range stride_scale: [1] function: - copy_strided_batched: *single_double_precisions_complex_real #- dot_strided_batched: *half_bfloat_single_double_complex_real_precisions - dot_strided_batched: *single_double_precisions_complex_real - dotc_strided_batched: *single_double_precisions_complex - swap_strided_batched: *single_double_precisions_complex_real - rot_strided_batched: *rot_precisions - rotm_strided_batched: *single_double_precisions #*single_double_precisions_complex_real # Swap, rot, and rotm get their own for quick tests as they write to x and y - name: blas1 category: quick N: *N_range incx_incy: *incx_incy_range_x_y_output batch_count: *batch_count_range stride_scale: [ 1 ] function: - swap: *single_double_precisions_complex_real - rot: *rot_precisions - rotm: *single_double_precisions #*single_double_precisions_complex_real - name: blas1 category: quick N: *N_range incx_incy: *incx_incy_range_x_y_output batch_count: *batch_count_range stride_scale: [ 1 ] function: - swap_batched: *single_double_precisions_complex_real - rot_batched: *rot_precisions - rotm_batched: *single_double_precisions #*single_double_precisions_complex_real - name: blas1 category: quick N: *N_range incx_incy: *incx_incy_range_x_y_output batch_count: *batch_count_range stride_scale: [ 1 ] function: - swap_strided_batched: *single_double_precisions_complex_real - rot_strided_batched: *rot_precisions - rotm_strided_batched: *single_double_precisions #*single_double_precisions_complex_real ########### # Level 2 # ########### # gbmv - name: gbmv category: quick function: - gbmv - gbmv_batched - gbmv_strided_batched arguments: *gbmv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: gemv category: quick function: - gemv - gemv_batched - gemv_strided_batched arguments: *gemv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: ger category: quick function: - ger - ger_batched - ger_strided_batched arguments: *ger_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: gerc category: quick function: - gerc - gerc_batched - gerc_strided_batched arguments: *geruc_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: geru category: quick function: - geru - geru_batched - geru_strided_batched arguments: *geruc_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: hbmv category: quick function: - hbmv - hbmv_batched - hbmv_strided_batched arguments: *hbmv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: hemv category: quick function: - hemv - hemv_batched - hemv_strided_batched arguments: *hemv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: her category: quick function: - her - her_batched - her_strided_batched arguments: *her_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: her2 category: quick function: - her2 - her2_batched - her2_strided_batched arguments: *her2_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: hpmv category: quick function: - hpmv - hpmv_batched - hpmv_strided_batched arguments: *hpmv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: hpr category: quick function: - hpr - hpr_batched - hpr_strided_batched arguments: *hpr_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: hpr2 category: quick function: - hpr2 - hpr2_batched - hpr2_strided_batched arguments: *hpr2_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: sbmv category: quick function: - sbmv - sbmv_batched - sbmv_strided_batched arguments: *sbmv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: spmv category: quick function: - spmv - spmv_batched - spmv_strided_batched arguments: *spmv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: spr category: quick function: - spr - spr_batched - spr_strided_batched arguments: *spr_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: spr2 category: quick function: - spr2 - spr2_batched - spr2_strided_batched arguments: *spr2_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: symv category: quick function: - symv - symv_batched - symv_strided_batched arguments: *symv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: syr category: quick function: - syr - syr_batched - syr_strided_batched arguments: *syr_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: syr2 category: quick function: - syr2 - syr2_batched - syr2_strided_batched arguments: *syr2_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: tbmv category: quick function: - tbmv - tbmv_batched - tbmv_strided_batched arguments: *tbmv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: tpmv category: quick function: - tpmv - tpmv_batched - tpmv_strided_batched arguments: *tpmv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: trmv category: quick function: - trmv - trmv_batched - trmv_strided_batched arguments: *trmv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: tbsv category: quick function: - tbsv - tbsv_batched - tbsv_strided_batched arguments: *tbsv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: tpsv category: quick function: - tpsv - tpsv_batched - tpsv_strided_batched arguments: *tpsv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: trsv category: quick function: - trsv - trsv_batched - trsv_strided_batched arguments: *trsv_common_args batch_count: *batch_count_range stride_scale: [ 1 ] ########### # Level 3 # ########### - name: dgmm category: quick function: - dgmm - dgmm_batched - dgmm_strided_batched arguments: *dgmm_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: geam category: quick function: - geam - geam_batched - geam_strided_batched arguments: *geam_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: hemm category: quick function: - hemm - hemm_batched - hemm_strided_batched arguments: *hemm_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: herk category: quick function: - herk - herk_batched - herk_strided_batched arguments: *herk_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: her2k category: quick function: - her2k - her2k_batched - her2k_strided_batched arguments: *her2k_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: herkx category: quick function: - herkx - herkx_batched - herkx_strided_batched arguments: *herkx_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: symm category: quick function: - symm - symm_batched - symm_strided_batched arguments: *symm_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: syrk category: quick function: - syrk - syrk_batched - syrk_strided_batched arguments: *syrk_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: syr2k category: quick function: - syr2k - syr2k_batched - syr2k_strided_batched arguments: *syr2k_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: syrkx category: quick function: - syrkx - syrkx_batched - syrkx_strided_batched arguments: *syrkx_common_args batch_count: *batch_count_range stride_scale: [ 1 ] # gemms - name: gemm category: quick function: - gemm - gemm_batched - gemm_strided_batched arguments: *gemm_common_args batch_count: *batch_count_range stride_scale: [ 1 ] - name: gemm_ex category: quick function: - gemm_ex - gemm_batched_ex - gemm_strided_batched_ex arguments: *gemm_ex_common_args batch_count: *batch_count_range stride_scale: [ 1 ] ... hipBLAS-rocm-5.5.1/clients/include/hipblas_template.yaml000066400000000000000000000003101434647641600232000ustar00rootroot00000000000000# Template used to process YAML from log files --- include: hipblas_common.yaml Functions: ###### # L1 # ###### # scal # hipblas_sscal: { function: scal, <<: *single_precision } Tests: hipBLAS-rocm-5.5.1/clients/include/hipblas_test.hpp000066400000000000000000000062311434647641600222010ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ************************************************************************ */ #pragma once #ifdef GOOGLE_TEST #include #endif #include "hipblas_arguments.hpp" #include #include #include #include #include #include #include #include #include #include #include #ifdef WIN32 typedef long long ssize_t; /* x64 only supported */ #endif // ---------------------------------------------------------------------------- // Normal tests which return true when converted to bool // ---------------------------------------------------------------------------- struct hipblas_test_valid { // Return true to indicate the type combination is valid, for filtering virtual explicit operator bool() final { return true; } // Require derived class to define functor which takes (const Arguments &) virtual void operator()(const Arguments&) = 0; virtual ~hipblas_test_valid() = default; }; // ---------------------------------------------------------------------------- // Error case which returns false when converted to bool. A void specialization // of the FILTER class template above, should be derived from this class, in // order to indicate that the type combination is invalid. // ---------------------------------------------------------------------------- struct hipblas_test_invalid { // Return false to indicate the type combination is invalid, for filtering virtual explicit operator bool() final { return false; } // If this specialization is actually called, print fatal error message virtual void operator()(const Arguments&) final { static constexpr char msg[] = "Internal error: Test called with invalid types"; #ifdef GOOGLE_TEST FAIL() << msg; #else std::cerr << msg << std::endl; abort(); #endif } virtual ~hipblas_test_invalid() = default; }; hipBLAS-rocm-5.5.1/clients/include/hipblas_unique_ptr.hpp000066400000000000000000000052101434647641600234110ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef GUARD_HIPBLAS_MANAGE_PTR_HPP #define GUARD_HIPBLAS_MANAGE_PTR_HPP #include #include "hipblas.h" #define PRINT_IF_HIP_ERROR(INPUT_STATUS_FOR_CHECK) \ { \ hipError_t TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ if(TMP_STATUS_FOR_CHECK != hipSuccess) \ { \ fprintf(stderr, \ "hip error code: %d at %s:%d\n", \ TMP_STATUS_FOR_CHECK, \ __FILE__, \ __LINE__); \ } \ } namespace hipblas { // device_malloc wraps hipMalloc and provides same API as malloc static void* device_malloc(size_t byte_size) { void* pointer; PRINT_IF_HIP_ERROR(hipMalloc(&pointer, byte_size)); return pointer; } // device_free wraps hipFree and provides same API as free static void device_free(void* ptr) { PRINT_IF_HIP_ERROR(hipFree(ptr)); } } // namespace hipblas using hipblas_unique_ptr = std::unique_ptr; #endif hipBLAS-rocm-5.5.1/clients/include/hipblas_vector.hpp000066400000000000000000000421411434647641600225240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef HIPBLAS_VECTOR_H_ #define HIPBLAS_VECTOR_H_ #include "d_vector.hpp" #include "device_batch_vector.hpp" #include "hipblas.h" #include "host_batch_vector.hpp" #include "utility.h" #include #include #include #include //! //! @brief enum to check for NaN initialization of the Input vector/matrix //! typedef enum hipblas_check_nan_init_ { // Alpha sets NaN hipblas_client_alpha_sets_nan, // Beta sets NaN hipblas_client_beta_sets_nan, // Never set NaN hipblas_client_never_set_nan } hipblas_check_nan_init; /* ============================================================================================ */ /*! \brief pseudo-vector subclass which uses device memory */ template class device_vector : private d_vector { public: // Must wrap constructor and destructor in functions to allow Google Test macros to work explicit device_vector(size_t s) : d_vector(s) { data = this->device_vector_setup(); } ~device_vector() { this->device_vector_teardown(data); } // Decay into pointer wherever pointer is expected operator T*() { return data; } operator const T*() const { return data; } // Tell whether malloc failed explicit operator bool() const { return data != nullptr; } // Disallow copying or assigning device_vector(const device_vector&) = delete; device_vector& operator=(const device_vector&) = delete; private: T* data; }; //! //! @brief Pseudo-vector subclass which uses host memory. //! template struct host_vector : std::vector { // Inherit constructors using std::vector::vector; //! //! @brief Constructor. //! host_vector(size_t n, ptrdiff_t inc) : std::vector(n * std::abs(inc)) , m_n(n) , m_inc(inc) { } //! //! @brief Copy constructor from host_vector of other types convertible to T //! template {}, int> = 0> host_vector(const host_vector& x) : std::vector(x.size()) , m_n(x.size()) , m_inc(1) { for(size_t i = 0; i < m_n; ++i) (*this)[i] = x[i]; } //! //! @brief Decay into pointer wherever pointer is expected //! operator T*() { return this->data(); } //! //! @brief Decay into constant pointer wherever constant pointer is expected //! operator const T*() const { return this->data(); } //! //! @brief Transfer from a device vector. //! @param that That device vector. //! @return the hip error. //! hipError_t transfer_from(const device_vector& that) { hipError_t hip_err; if(that.use_HMM && hipSuccess != (hip_err = hipDeviceSynchronize())) return hip_err; return hipMemcpy(*this, that, sizeof(T) * this->size(), that.use_HMM ? hipMemcpyHostToHost : hipMemcpyDeviceToHost); } //! //! @brief Returns the length of the vector. //! size_t n() const { return m_n; } //! //! @brief Returns the increment of the vector. //! ptrdiff_t inc() const { return m_inc; } //! //! @brief Returns the batch count (always 1). //! static constexpr int batch_count() { return 1; } //! //! @brief Returns the stride (out of context, always 0) //! static constexpr hipblasStride stride() { return 0; } //! //! @brief Check if memory exists (out of context, always hipSuccess) //! static constexpr hipError_t memcheck() { return hipSuccess; } private: size_t m_n = 0; ptrdiff_t m_inc = 0; }; //! //! @brief Template for initializing a host (non_batched|batched|strided_batched)vector. //! @param that That vector. //! @param rand_gen The random number generator //! @param seedReset Reset the seed if true, do not reset the seed otherwise. //! template void hipblas_init_template(U& that, T rand_gen(), bool seedReset, bool alternating_sign = false) { if(seedReset) hipblas_seedrand(); for(int batch_index = 0; batch_index < that.batch_count(); ++batch_index) { auto* batched_data = that[batch_index]; ptrdiff_t inc = that.inc(); auto n = that.n(); if(inc < 0) batched_data -= (n - 1) * inc; if(alternating_sign) { for(int i = 0; i < n; i++) { auto value = rand_gen(); batched_data[i * inc] = (i ^ 0) & 1 ? value : hipblas_negate(value); } } else { for(int i = 0; i < n; ++i) batched_data[i * inc] = rand_gen(); } } } //! //! @brief Initialize a host_batch_vector with NaNs. //! @param that The host_batch_vector to be initialized. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template inline void hipblas_init_nan(host_batch_vector& that, bool seedReset = false) { hipblas_init_template(that, random_nan_generator, seedReset); } // //! // //! @brief Initialize a host_vector with NaNs. // //! @param that The host_vector to be initialized. // //! @param seedReset reset he seed if true, do not reset the seed otherwise. // //! // template // inline void hipblas_init_nan(host_vector& that, bool seedReset = false) // { // hipblas_init_template(that, random_nan_generator, seedReset); // } template inline void hipblas_init_nan( host_vector& A, size_t M, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) A[i + j * lda + i_batch * stride] = T(hipblas_nan_rng()); } //! //! @brief Initialize a host_batch_vector. //! @param that The host_batch_vector. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template inline void hipblas_init_hpl(host_batch_vector& that, bool seedReset = false, bool alternating_sign = false) { hipblas_init_template(that, random_hpl_generator, seedReset, alternating_sign); } //! //! @brief Initialize a host_batch_vector. //! @param that The host_batch_vector. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template inline void hipblas_init(host_batch_vector& that, bool seedReset = false, bool alternating_sign = false) { hipblas_init_template(that, random_generator, seedReset, alternating_sign); } //! //! @brief Initialize a host_vector. //! @param that The host_vector. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template inline void hipblas_init(host_vector& that, bool seedReset = false) { if(seedReset) hipblas_seedrand(); hipblas_init(that, that.size(), 1, 1); } //! //! @brief trig Initialize of a host_batch_vector. //! @param that The host_batch_vector. //! @param init_cos cos initialize if true, else sin initialize. //! template inline void hipblas_init_trig(host_batch_vector& that, bool init_cos = false) { if(init_cos) { for(int batch_index = 0; batch_index < that.batch_count(); ++batch_index) { auto* batched_data = that[batch_index]; ptrdiff_t inc = that.inc(); auto n = that.n(); if(inc < 0) batched_data -= (n - 1) * inc; hipblas_init_cos(batched_data, 1, n, inc, 0, 1); } } else { for(int batch_index = 0; batch_index < that.batch_count(); ++batch_index) { auto* batched_data = that[batch_index]; ptrdiff_t inc = that.inc(); auto n = that.n(); if(inc < 0) batched_data -= (n - 1) * inc; hipblas_init_sin(batched_data, 1, n, inc, 0, 1); } } } //! //! @brief Initialize a host_vector. //! @param hx The host_vector. //! @param arg Specifies the argument class. //! @param N Length of the host vector. //! @param incx Increment for the host vector. //! @param stride_x Incement between the host vector. //! @param batch_count number of instances in the batch. //! @param nan_init Initialize vector with Nan's depending upon the hipblas_check_nan_init enum value. //! @param seedReset reset the seed if true, do not reset the seed otherwise. Use init_cos if seedReset is true else use init_sin. //! @param alternating_sign Initialize vector so adjacent entries have alternating sign. //! template inline void hipblas_init_vector(host_vector& hx, const Arguments& arg, size_t N, size_t incx, hipblasStride stride_x, int batch_count, hipblas_check_nan_init nan_init, bool seedReset = false, bool alternating_sign = false) { if(seedReset) hipblas_seedrand(); if(nan_init == hipblas_client_alpha_sets_nan && hipblas_isnan(arg.alpha)) { hipblas_init_nan(hx, 1, N, incx, stride_x, batch_count); } else if(nan_init == hipblas_client_beta_sets_nan && hipblas_isnan(arg.beta)) { hipblas_init_nan(hx, 1, N, incx, stride_x, batch_count); } else if(arg.initialization == hipblas_initialization::hpl) { if(alternating_sign) hipblas_init_hpl_alternating_sign(hx, 1, N, incx, stride_x, batch_count); else hipblas_init_hpl(hx, 1, N, incx, stride_x, batch_count); } else if(arg.initialization == hipblas_initialization::rand_int) { if(alternating_sign) hipblas_init_alternating_sign(hx, 1, N, incx, stride_x, batch_count); else hipblas_init(hx, 1, N, incx, stride_x, batch_count); } else if(arg.initialization == hipblas_initialization::trig_float) { if(seedReset) hipblas_init_cos(hx, 1, N, incx, stride_x, batch_count); else hipblas_init_sin(hx, 1, N, incx, stride_x, batch_count); } } //! //! @brief Initialize a host_batch_vector. //! @param hx The host_batch_vector. //! @param arg Specifies the argument class. //! @param nan_init Initialize vector with Nan's depending upon the hipblas_check_nan_init enum value. //! @param seedReset reset the seed if true, do not reset the seed otherwise. Use init_cos if seedReset is true else use init_sin. //! @param alternating_sign Initialize vector so adjacent entries have alternating sign. //! template inline void hipblas_init_vector(host_batch_vector& hx, const Arguments& arg, hipblas_check_nan_init nan_init, bool seedReset = false, bool alternating_sign = false) { if(nan_init == hipblas_client_alpha_sets_nan && hipblas_isnan(arg.alpha)) { hipblas_init_nan(hx, seedReset); } else if(nan_init == hipblas_client_beta_sets_nan && hipblas_isnan(arg.beta)) { hipblas_init_nan(hx, seedReset); } else if(arg.initialization == hipblas_initialization::hpl) { hipblas_init_hpl(hx, seedReset, alternating_sign); } else if(arg.initialization == hipblas_initialization::rand_int) { hipblas_init(hx, seedReset, alternating_sign); } else if(arg.initialization == hipblas_initialization::trig_float) { hipblas_init_trig(hx, seedReset); } } //! //! @brief Initialize a host matrix. //! @param hA The host matrix. //! @param arg Specifies the argument class. //! @param M Length of the host matrix. //! @param N Length of the host matrix. //! @param lda Leading dimension of the host matrix. //! @param stride_A Incement between the host matrix. //! @param batch_count number of instances in the batch. //! @param nan_init Initialize matrix with Nan's depending upon the hipblas_check_nan_init enum value. //! @param seedReset reset the seed if true, do not reset the seed otherwise. Use init_cos if seedReset is true else use init_sin. //! @param alternating_sign Initialize matrix so adjacent entries have alternating sign. //! template inline void hipblas_init_matrix(host_vector& hA, const Arguments& arg, size_t M, size_t N, size_t lda, hipblasStride stride_A, int batch_count, hipblas_check_nan_init nan_init, bool seedReset = false, bool alternating_sign = false) { if(seedReset) hipblas_seedrand(); if(nan_init == hipblas_client_alpha_sets_nan && hipblas_isnan(arg.alpha)) { hipblas_init_nan(hA, M, N, lda, stride_A, batch_count); } else if(nan_init == hipblas_client_beta_sets_nan && hipblas_isnan(arg.beta)) { hipblas_init_nan(hA, M, N, lda, stride_A, batch_count); } else if(arg.initialization == hipblas_initialization::hpl) { if(alternating_sign) hipblas_init_hpl_alternating_sign(hA, M, N, lda, stride_A, batch_count); else hipblas_init_hpl(hA, M, N, lda, stride_A, batch_count); } else if(arg.initialization == hipblas_initialization::rand_int) { if(alternating_sign) hipblas_init_alternating_sign(hA, M, N, lda, stride_A, batch_count); else hipblas_init(hA, M, N, lda, stride_A, batch_count); } else if(arg.initialization == hipblas_initialization::trig_float) { if(seedReset) hipblas_init_cos(hA, M, N, lda, stride_A, batch_count); else hipblas_init_sin(hA, M, N, lda, stride_A, batch_count); } } //! //! @brief Template for initializing a host (non_batched|batched|strided_batched)vector. //! @param that That vector. //! @param rand_gen The random number generator for odd elements //! @param rand_gen_alt The random number generator for even elements //! @param seedReset Reset the seed if true, do not reset the seed otherwise. //! template void hipblas_init_alternating_template(U& that, T rand_gen(), T rand_gen_alt(), bool seedReset) { if(seedReset) hipblas_seedrand(); for(int b = 0; b < that.batch_count(); ++b) { auto* batched_data = that[b]; ptrdiff_t inc = that.inc(); auto n = that.n(); if(inc < 0) batched_data -= (n - 1) * inc; for(int i = 0; i < n; ++i) { if(i % 2) batched_data[i * inc] = rand_gen(); else batched_data[i * inc] = rand_gen_alt(); } } } template void hipblas_init_alternating_sign(host_batch_vector& that, bool seedReset = false) { hipblas_init_alternating_template( that, random_generator, random_generator_negative, seedReset); } #endif hipBLAS-rocm-5.5.1/clients/include/host_batch_vector.hpp000066400000000000000000000155071434647641600232260ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ // #pragma once #include #include // // Local declaration of the device batch vector. // template class device_batch_vector; //! //! @brief Implementation of the batch vector on host. //! template class host_batch_vector { public: //! //! @brief Delete copy constructor. //! host_batch_vector(const host_batch_vector& that) = delete; //! //! @brief Delete copy assignement. //! host_batch_vector& operator=(const host_batch_vector& that) = delete; //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param batch_count The batch count. //! explicit host_batch_vector(int n, int inc, int batch_count) : m_n(n) , m_inc(inc ? inc : 1) , m_batch_count(batch_count) { if(false == this->try_initialize_memory()) { this->free_memory(); } } //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param stride (UNUSED) The stride. //! @param batch_count The batch count. //! explicit host_batch_vector(int n, int inc, hipblasStride stride, int batch_count) : host_batch_vector(n, inc, batch_count) { } //! //! @brief Destructor. //! ~host_batch_vector() { this->free_memory(); } //! //! @brief Returns the length of the vector. //! int n() const { return this->m_n; } //! //! @brief Returns the increment of the vector. //! int inc() const { return this->m_inc; } //! //! @brief Returns the batch count. //! int batch_count() const { return this->m_batch_count; } //! //! @brief Returns the stride value. //! hipblasStride stride() const { return 0; } //! //! @brief Random access to the vectors. //! @param batch_index the batch index. //! @return The mutable pointer. //! T* operator[](int batch_index) { return this->m_data[batch_index]; } //! //! @brief Constant random access to the vectors. //! @param batch_index the batch index. //! @return The non-mutable pointer. //! const T* operator[](int batch_index) const { return this->m_data[batch_index]; } //! //! @brief Cast to a double pointer. //! // clang-format off operator T**() // clang-format on { return this->m_data; } //! //! @brief Constant cast to a double pointer. //! operator const T* const *() { return this->m_data; } //! //! @brief Copy from a host batched vector. //! @param that the vector the data is copied from. //! @return true if the copy is done successfully, false otherwise. //! bool copy_from(const host_batch_vector& that) { if((this->batch_count() == that.batch_count()) && (this->n() == that.n()) && (this->inc() == that.inc())) { size_t num_bytes = this->n() * std::abs(this->inc()) * sizeof(T); for(int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { memcpy((*this)[batch_index], that[batch_index], num_bytes); } return true; } else { return false; } } //! //! @brief Transfer from a device batched vector. //! @param that the vector the data is copied from. //! @return the hip error. //! hipError_t transfer_from(const device_batch_vector& that) { hipError_t hip_err; size_t num_bytes = size_t(this->m_n) * std::abs(this->m_inc) * sizeof(T); for(int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { if(hipSuccess != (hip_err = hipMemcpy( (*this)[batch_index], that[batch_index], num_bytes, hipMemcpyDeviceToHost))) { return hip_err; } } return hipSuccess; } //! //! @brief Check if memory exists. //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. //! hipError_t memcheck() const { return (nullptr != this->m_data) ? hipSuccess : hipErrorOutOfMemory; } private: int m_n{}; int m_inc{}; int m_batch_count{}; T** m_data{}; bool try_initialize_memory() { bool success = (nullptr != (this->m_data = (T**)calloc(this->m_batch_count, sizeof(T*)))); if(success) { size_t nmemb = size_t(this->m_n) * std::abs(this->m_inc); for(int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { success = (nullptr != (this->m_data[batch_index] = (T*)calloc(nmemb, sizeof(T)))); if(false == success) { break; } } } return success; } void free_memory() { if(nullptr != this->m_data) { for(int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { if(nullptr != this->m_data[batch_index]) { free(this->m_data[batch_index]); this->m_data[batch_index] = nullptr; } } free(this->m_data); this->m_data = nullptr; } } }; hipBLAS-rocm-5.5.1/clients/include/near.h000066400000000000000000000110541434647641600201040ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef _NEAR_H #define _NEAR_H #include "hipblas.h" #include "hipblas_vector.hpp" #ifdef GOOGLE_TEST #include "gtest/gtest.h" #endif /* ===================================================================== Google Unit check: ASSERT_EQ( elementof(A), elementof(B)) =================================================================== */ /*!\file * \brief compares two results (usually, CPU and GPU results); provides Google Unit check. */ /* ========================================Gtest Unit Check * ==================================================== */ // sqrt(0.5) factor for complex cutoff calculations constexpr double sqrthalf = 0.7071067811865475244; /*! \brief Template: gtest near compare two matrices float/double/complex */ template void near_check_general(int M, int N, int lda, T* hCPU, T* hGPU, double abs_error); template void near_check_general( int M, int N, int lda, host_vector hCPU, host_vector hGPU, double abs_error); template void near_check_general(int M, int N, int batch_count, int lda, hipblasStride stride_A, T* hCPU, T* hGPU, double abs_error); template void near_check_general( int M, int N, int batch_count, int lda, T** hCPU, T** hGPU, double abs_error); template void near_check_general(int M, int N, int batch_count, int lda, host_vector hCPU[], host_vector hGPU[], double abs_error); // currently only used for half-precision comparisons int dot_ex tests template HIPBLAS_CLANG_STATIC constexpr double error_tolerance = 0.0; // 2 ^ -14, smallest positive normal number for IEEE16 template <> HIPBLAS_CLANG_STATIC constexpr double error_tolerance = 0.000061035; template static constexpr double sum_error_tolerance_for_gfx11 = 0.0; template <> HIPBLAS_CLANG_STATIC constexpr double sum_error_tolerance_for_gfx11 = 1 / 10000.0; template <> HIPBLAS_CLANG_STATIC constexpr double sum_error_tolerance_for_gfx11 = 1 / 10000.0; template <> HIPBLAS_CLANG_STATIC constexpr double sum_error_tolerance_for_gfx11 = 1 / 10000.0; template <> HIPBLAS_CLANG_STATIC constexpr double sum_error_tolerance_for_gfx11 = 1 / 10000.0; template <> HIPBLAS_CLANG_STATIC constexpr double sum_error_tolerance_for_gfx11 = 1 / 10000.0; template <> HIPBLAS_CLANG_STATIC constexpr double sum_error_tolerance_for_gfx11 = 1 / 10000.0; template <> HIPBLAS_CLANG_STATIC constexpr double sum_error_tolerance_for_gfx11 = 1 / 1000000.0; #endif hipBLAS-rocm-5.5.1/clients/include/norm.h000066400000000000000000000145351434647641600201410ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef _NORM_H #define _NORM_H #include "hipblas.h" #include "hipblas_vector.hpp" /* ===================================================================== Norm check: norm(A-B)/norm(A), evaluate relative error =================================================================== */ /*!\file * \brief compares two results (usually, CPU and GPU results); provides Norm check */ /* ========================================Norm Check * ==================================================== */ /*! \brief Template: norm check for general Matrix: float/doubel/complex */ // see check_norm.cpp for template speciliazation // use auto as the return type is only allowed in c++14 // convert float/float to double template double norm_check_general(char norm_type, int M, int N, int lda, T* hCPU, T* hGPU); /*! \brief Template: norm check for hermitian/symmetric Matrix: float/double/complex */ template double norm_check_symmetric(char norm_type, char uplo, int N, int lda, T* hCPU, T* hGPU); template double norm_check_general(char norm_type, int M, int N, int lda, host_vector hCPU[], host_vector hGPU[], int batch_count) { // norm type can be O', 'I', 'F', 'o', 'i', 'f' for one, infinity or Frobenius norm // one norm is max column sum // infinity norm is max row sum // Frobenius is l2 norm of matrix entries // // use triangle inequality ||a+b|| <= ||a|| + ||b|| to calculate upper limit for Frobenius norm // of strided batched matrix double cumulative_error = 0.0; for(int i = 0; i < batch_count; i++) { auto index = i; auto error = norm_check_general(norm_type, M, N, lda, hCPU[index], hGPU[index]); if(norm_type == 'F' || norm_type == 'f') { cumulative_error += error; } else if(norm_type == 'O' || norm_type == 'o' || norm_type == 'I' || norm_type == 'i') { cumulative_error = cumulative_error > error ? cumulative_error : error; } } return cumulative_error; } /* ============== Norm Check for strided_batched case ============= */ template double norm_check_general( char norm_type, int M, int N, int lda, ptrdiff_t stride_a, T* hCPU, T* hGPU, int batch_count) { // norm type can be O', 'I', 'F', 'o', 'i', 'f' for one, infinity or Frobenius norm // one norm is max column sum // infinity norm is max row sum // Frobenius is l2 norm of matrix entries // // use triangle inequality ||a+b|| <= ||a|| + ||b|| to calculate upper limit for Frobenius norm // of strided batched matrix double cumulative_error = 0.0; for(size_t i = 0; i < batch_count; i++) { auto index = i * stride_a; auto error = norm_check_general(norm_type, M, N, lda, hCPU + index, hGPU + index); if(norm_type == 'F' || norm_type == 'f') { cumulative_error += error; } else if(norm_type == 'O' || norm_type == 'o' || norm_type == 'I' || norm_type == 'i') { cumulative_error = cumulative_error > error ? cumulative_error : error; } } return cumulative_error; } template double norm_check_general(char norm_type, int M, int N, int lda, host_batch_vector& hCPU, host_batch_vector& hGPU, int batch_count) { // norm type can be O', 'I', 'F', 'o', 'i', 'f' for one, infinity or Frobenius norm // one norm is max column sum // infinity norm is max row sum // Frobenius is l2 norm of matrix entries // // use triangle inequality ||a+b|| <= ||a|| + ||b|| to calculate upper limit for Frobenius norm // of strided batched matrix double cumulative_error = 0.0; for(int i = 0; i < batch_count; i++) { auto index = i; auto error = norm_check_general(norm_type, M, N, lda, hCPU[index], hGPU[index]); if(norm_type == 'F' || norm_type == 'f') { cumulative_error += error; } else if(norm_type == 'O' || norm_type == 'o' || norm_type == 'I' || norm_type == 'i') { cumulative_error = cumulative_error > error ? cumulative_error : error; } } return cumulative_error; } template double vector_norm_1(int M, int incx, T* hx_gold, T* hx) { double max_err_scal = 0.0; double max_err = 0.0; for(int i = 0; i < M; i++) { max_err += std::abs((hx_gold[i * incx] - hx[i * incx])); max_err_scal += std::abs(hx_gold[i * incx]); } if(std::abs(max_err_scal) < 1e6) max_err_scal = 1; return max_err / max_err_scal; } #endif hipBLAS-rocm-5.5.1/clients/include/program_options.hpp000066400000000000000000000427271434647641600227540ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ // This emulates the required functionality of boost::program_options #pragma once #include #include #include #include #include #include #include #include #include #include #include namespace roc { // Regular expression for token delimiters (whitespace and commas) static const std::regex program_options_regex{"[, \\f\\n\\r\\t\\v]+", std::regex_constants::optimize}; // Polymorphic base class to use with dynamic_cast class value_base { protected: bool m_has_actual = false; bool m_has_default = false; public: virtual ~value_base() = default; bool has_actual() const { return m_has_actual; } bool has_default() const { return m_has_default; } }; // Value parameters template class value : public value_base { T m_var; // Variable to be modified if no pointer provided T* m_var_ptr; // Pointer to variable to be modified public: // Constructor explicit value() : m_var_ptr(nullptr) { } explicit value(T var, bool defaulted) : m_var(var) , m_var_ptr(nullptr) { m_has_actual = !defaulted; m_has_default = defaulted; } explicit value(T* var_ptr) : m_var_ptr(var_ptr) { } // Allows actual_value() and default_value() value* operator->() { return this; } // Get the value const T& get_value() const { if(m_var_ptr) return *m_var_ptr; else return m_var; } // Set actual value value& actual_value(T val) { if(m_var_ptr) *m_var_ptr = std::move(val); else m_var = std::move(val); m_has_actual = true; return *this; } // Set default value value& default_value(T val) { if(!m_has_actual) { if(m_var_ptr) *m_var_ptr = std::move(val); else m_var = std::move(val); m_has_default = true; } return *this; } }; // bool_switch is a value, which is handled specially using bool_switch = value; class variable_value { std::shared_ptr m_val; public: // Constructor explicit variable_value() = default; template explicit variable_value(const T& xv, bool xdefaulted) : m_val(std::make_shared>(xv, xdefaulted)) { } explicit variable_value(std::shared_ptr val) : m_val(val) { } // Member functions bool empty() const { return !m_val.get() || (!m_val->has_actual() && !m_val->has_default()); } bool defaulted() const { return m_val.get() && !m_val->has_actual() && m_val->has_default(); } template const T& as() const { if(value* val = dynamic_cast*>(m_val.get())) return val->get_value(); else throw std::logic_error("Internal error: Invalid cast"); } }; using variables_map = std::map; class options_description { // desc_option describes a particular option class desc_option { std::string m_opts; std::shared_ptr m_val; std::string m_desc; public: // Constructor with options, value and description template desc_option(std::string opts, value val, std::string desc) : m_opts(std::move(opts)) , m_val(new auto(std::move(val))) , m_desc(std::move(desc)) { } // Constructor with options and description desc_option(std::string opts, std::string desc) : m_opts(std::move(opts)) , m_val(nullptr) , m_desc(std::move(desc)) { } // Copy constructor is deleted desc_option(const desc_option&) = delete; // Move constructor desc_option(desc_option&& other) = default; // Accessors const std::string& get_opts() const { return m_opts; } const std::shared_ptr get_val() const { return m_val; } const std::string& get_desc() const { return m_desc; } // Set a value void set_val(int& argc, char**& argv, std::string inopt) const { // We test all supported types with dynamic_cast and parse accordingly bool match = false; if(auto* ptr = dynamic_cast*>(m_val.get())) { int32_t val; match = argc && sscanf(*argv, "%" SCNd32, &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { uint32_t val; match = argc && sscanf(*argv, "%" SCNu32, &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { int64_t val; match = argc && sscanf(*argv, "%" SCNd64, &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { uint64_t val; match = argc && sscanf(*argv, "%" SCNu64, &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { float val; match = argc && sscanf(*argv, "%f", &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { double val; match = argc && sscanf(*argv, "%lf", &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { char val; match = argc && sscanf(*argv, " %c", &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { // We handle bool specially, setting the value to true without argument ptr->actual_value(true); return; } else if(auto* ptr = dynamic_cast*>(m_val.get())) { if(argc) { ptr->actual_value(*argv); match = true; } } else { throw std::logic_error("Internal error: Unsupported data type"); } if(!match) throw std::invalid_argument(argc ? "Invalid value for " + inopt : "Missing required value for " + inopt); // Skip past the argument's value ++argv; --argc; } }; // Description and option list std::string m_desc; std::vector m_optlist; // desc_optionlist allows chains of options to be parenthesized class desc_optionlist { std::vector& m_list; public: explicit desc_optionlist(std::vector& list) : m_list(list) { } template desc_optionlist operator()(Ts&&... arg) { m_list.push_back(desc_option(std::forward(arg)...)); return *this; } }; // Parse an option at the current (argc, argv) position void parse_option(int& argc, char**& argv, variables_map& vm, bool ignoreUnknown) const { // Iterate across all options for(const auto& opt : m_optlist) { // Canonical name used for map std::string canonical_name; // Iterate across tokens in the opts for(std::sregex_token_iterator tok{ opt.get_opts().begin(), opt.get_opts().end(), program_options_regex, -1}; tok != std::sregex_token_iterator(); ++tok) { // The first option in a list of options is the canonical name if(!canonical_name.length()) canonical_name = tok->str(); // If the length of the option is 1, it is single-dash; otherwise double-dash const char* prefix = tok->length() == 1 ? "-" : "--"; // If option matches if(*argv == prefix + tok->str()) { ++argv; --argc; // If option has a value, set it if(opt.get_val().get()) opt.set_val(argc, argv, prefix + tok->str()); // Add seen options to map vm[canonical_name] = variable_value(opt.get_val()); return; // Return successfully } } } // No options were matched if(ignoreUnknown) { ++argv; --argc; } else throw std::invalid_argument("Option " + std::string(argv[0]) + " is not defined."); } public: // Constructor explicit options_description(std::string desc) : m_desc(std::move(desc)) { } // Start a desc_optionlist chain desc_optionlist add_options() & { return desc_optionlist(m_optlist); } // Parse all options void parse_options(int& argc, char**& argv, variables_map& vm, bool ignoreUnknown = false) const { // Add options with default values to map for(const auto& opt : m_optlist) { std::sregex_token_iterator tok{ opt.get_opts().begin(), opt.get_opts().end(), program_options_regex, -1}; // Canonical name used for map std::string canonical_name = tok->str(); if(opt.get_val().get() && opt.get_val()->has_default()) vm[canonical_name] = variable_value(opt.get_val()); } // Parse options while(argc) parse_option(argc, argv, vm, ignoreUnknown); } // Formatted output of command-line arguments description friend std::ostream& operator<<(std::ostream& os, const options_description& d) { // Iterate across all options for(const auto& opt : d.m_optlist) { bool first = true, printvalue = true; const char* delim = ""; std::ostringstream left; // Iterate across tokens in the opts for(std::sregex_token_iterator tok{opt.get_opts().begin(), opt.get_opts().end(), program_options_regex, -1}; tok != std::sregex_token_iterator(); ++tok, first = false, delim = " ") { // If the length of the option is 1, it is single-dash; otherwise double-dash const char* prefix = tok->length() == 1 ? "-" : "--"; left << delim << (first ? "" : "|") << prefix << tok->str(); if(tok->str() == "help" || tok->str() == "h") printvalue = false; } if(printvalue) left << " "; os << std::setw(26) << std::left << left.str() << " " << opt.get_desc() << " "; left.str(std::string()); // Print the default value of the variable type if it exists // We do not print the default value for bool const value_base* val = opt.get_val().get(); if(val && !dynamic_cast*>(val)) { if(val->has_default()) { // We test all supported types with dynamic_cast and print accordingly left << " (Default value is: "; if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else throw std::logic_error("Internal error: Unsupported data type"); left << ")"; } } os << left.str() << "\n\n"; } return os << std::flush; } }; // Class representing command line parser class parse_command_line { variables_map m_vm; public: parse_command_line(int argc, char** argv, const options_description& desc, bool ignoreUnknown = false) { ++argv; // Skip argv[0] --argc; desc.parse_options(argc, argv, m_vm, ignoreUnknown); } // Copy the variables_map friend void store(const parse_command_line& p, variables_map& vm) { vm = p.m_vm; } // Move the variables_map friend void store(parse_command_line&& p, variables_map& vm) { vm = std::move(p.m_vm); } }; // We can define the notify() function as a no-op for our purposes inline void notify(const variables_map&) {} } hipBLAS-rocm-5.5.1/clients/include/syrkx_reference.hpp000066400000000000000000000074701434647641600227240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" // reference implementation of syrkx. This function is not part of legacy BLAS. template void syrkx_reference(hipblasFillMode_t uplo, hipblasOperation_t trans, int n, int k, T alpha, T* a, int lda, T* b, int ldb, T beta, T* c, int ldc) { int a_s1 = HIPBLAS_OP_N == trans ? 1 : lda; int a_s2 = HIPBLAS_OP_N == trans ? lda : 1; int b_s1 = HIPBLAS_OP_N == trans ? 1 : ldb; int b_s2 = HIPBLAS_OP_N == trans ? ldb : 1; int c_s1 = 1; int c_s2 = ldc; // argument error int nrow = trans == HIPBLAS_OP_N ? n : k; if(n < 0) { std::cout << "ERROR: syrkx_reference n < 0" << std::endl; return; } if(k < 0) { std::cout << "ERROR: syrk_reference k < 0" << std::endl; return; } if(n > ldc) { std::cout << "ERROR: syrk_reference n > ldc" << std::endl; return; } if(nrow > lda) { std::cout << "ERROR: syrk_reference nrow > lda" << std::endl; return; } if(nrow > ldb) { std::cout << "ERROR: syrk_reference nrow > ldb" << std::endl; return; } // quick return if((n == 0) || (((alpha == 0) || (k == 0)) && (beta == 1))) return; // rank kx update with special cases for alpha == 0, beta == 0 for(int i1 = 0; i1 < n; i1++) { int i2_start = HIPBLAS_FILL_MODE_LOWER == uplo ? 0 : i1; int i2_end = HIPBLAS_FILL_MODE_LOWER == uplo ? i1 + 1 : n; for(int i2 = i2_start; i2 < i2_end; i2++) { if(alpha == 0 && beta == 0) { c[i1 * c_s1 + i2 * c_s2] = 0.0; } else if(alpha == 0) { c[i1 * c_s1 + i2 * c_s2] *= beta; } else { T t = 0; for(int i3 = 0; i3 < k; i3++) { t += a[i1 * a_s1 + i3 * a_s2] * b[i2 * b_s1 + i3 * b_s2]; } c[i1 * c_s1 + i2 * c_s2] = beta * c[i1 * c_s1 + i2 * c_s2] + alpha * t; } } } return; } hipBLAS-rocm-5.5.1/clients/include/test_cleanup.hpp000066400000000000000000000043201434647641600222030ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ************************************************************************ */ #pragma once #include #include #include /*! \brief Test cleanup handler. Frees memory or performs other cleanup at specified points in program. */ class test_cleanup { static auto& stack() { // Placed inside function to avoid dependency on initialization order static std::stack> stack; return stack; } public: // Run all cleanup handlers pushed so far, in LIFO order static void cleanup() { while(!stack().empty()) { stack().top()(); stack().pop(); } } // Create an object and register a cleanup handler template static T* allocate(T** ptr, Args&&... args) { *ptr = nullptr; stack().push([=] { delete *ptr; *ptr = nullptr; }); return new T(std::forward(args)...); } }; hipBLAS-rocm-5.5.1/clients/include/testing_asum.hpp000066400000000000000000000136711434647641600222300ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasAsumModel = ArgumentModel; inline void testname_asum(const Arguments& arg, std::string& name) { hipblasAsumModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_asum(const Arguments& arg) { using Tr = real_t; bool FORTRAN = arg.fortran; auto hipblasAsumFn = FORTRAN ? hipblasAsum : hipblasAsum; int N = arg.N; int incx = arg.incx; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0) { device_vector d_hipblas_result_0(1); host_vector h_hipblas_result_0(1); hipblas_init_nan(h_hipblas_result_0.data(), 1); CHECK_HIP_ERROR( hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAsumFn(handle, N, nullptr, incx, d_hipblas_result_0)); host_vector cpu_0(1); host_vector gpu_0(1); CHECK_HIP_ERROR(hipMemcpy(gpu_0, d_hipblas_result_0, sizeof(Tr), hipMemcpyDeviceToHost)); unit_check_general(1, 1, 1, cpu_0, gpu_0); return HIPBLAS_STATUS_SUCCESS; } size_t sizeX = size_t(N) * incx; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); device_vector dx(sizeX); device_vector d_hipblas_result(1); Tr cpu_result, hipblas_result_host, hipblas_result_device; double gpu_time_used, hipblas_error_host = 0, hipblas_error_device = 0; // Initial Data on CPU hipblas_init_vector(hx, arg, N, incx, 0, 1, hipblas_client_alpha_sets_nan, true); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * N * incx, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ // hipblasAsum accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAsumFn(handle, N, dx, incx, d_hipblas_result)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasAsumFn(handle, N, dx, incx, &hipblas_result_host)); CHECK_HIP_ERROR( hipMemcpy(&hipblas_result_device, d_hipblas_result, sizeof(Tr), hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_asum(N, hx.data(), incx, &cpu_result); if(arg.unit_check) { unit_check_general(1, 1, 1, &cpu_result, &hipblas_result_host); unit_check_general(1, 1, 1, &cpu_result, &hipblas_result_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('M', 1, 1, 1, &cpu_result, &hipblas_result_host); hipblas_error_device = norm_check_general('M', 1, 1, 1, &cpu_result, &hipblas_result_device); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasAsumFn(handle, N, dx, incx, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasAsumModel{}.log_args(std::cout, arg, gpu_time_used, asum_gflop_count(N), asum_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_asum_batched.hpp000066400000000000000000000154751434647641600237060ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasAsumBatchedModel = ArgumentModel; inline void testname_asum_batched(const Arguments& arg, std::string& name) { hipblasAsumBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_asum_batched(const Arguments& arg) { using Tr = real_t; bool FORTRAN = arg.fortran; auto hipblasAsumBatchedFn = FORTRAN ? hipblasAsumBatched : hipblasAsumBatched; int N = arg.N; int incx = arg.incx; int batch_count = arg.batch_count; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(1, batch_count)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasAsumBatchedFn(handle, N, nullptr, incx, batch_count, d_hipblas_result_0)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_vector h_hipblas_result_host(batch_count); host_vector h_hipblas_result_device(batch_count); host_vector h_cpu_result(batch_count); device_batch_vector dx(N, incx, batch_count); device_vector d_hipblas_result(batch_count); CHECK_HIP_ERROR(dx.memcheck()); // Initial Data on CPU hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); CHECK_HIP_ERROR(dx.transfer_from(hx)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { // hipblasAsum accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAsumBatchedFn( handle, N, dx.ptr_on_device(), incx, batch_count, d_hipblas_result)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasAsumBatchedFn( handle, N, dx.ptr_on_device(), incx, batch_count, h_hipblas_result_host)); CHECK_HIP_ERROR(hipMemcpy(h_hipblas_result_device, d_hipblas_result, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_asum(N, hx[b], incx, &(h_cpu_result[b])); } if(arg.unit_check) { unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result_host); unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, batch_count, 1, h_cpu_result, h_hipblas_result_host); hipblas_error_device = norm_check_general( 'F', 1, batch_count, 1, h_cpu_result, h_hipblas_result_device); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasAsumBatchedFn( handle, N, dx.ptr_on_device(), incx, batch_count, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasAsumBatchedModel{}.log_args(std::cout, arg, gpu_time_used, asum_gflop_count(N), asum_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_asum_strided_batched.hpp000066400000000000000000000163771434647641600254260ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasAsumStridedBatchedModel = ArgumentModel; inline void testname_asum_strided_batched(const Arguments& arg, std::string& name) { hipblasAsumStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_asum_strided_batched(const Arguments& arg) { using Tr = real_t; bool FORTRAN = arg.fortran; auto hipblasAsumStridedBatchedFn = FORTRAN ? hipblasAsumStridedBatched : hipblasAsumStridedBatched; int N = arg.N; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasStride stridex = size_t(N) * incx * stride_scale; size_t sizeX = stridex * batch_count; double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(1, batch_count)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAsumStridedBatchedFn( handle, N, nullptr, incx, stridex, batch_count, d_hipblas_result_0)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector cpu_result(batch_count); host_vector hipblas_result_host(batch_count); host_vector hipblas_result_device(batch_count); device_vector dx(sizeX); device_vector d_hipblas_result(batch_count); // Initial Data on CPU hipblas_init_vector( hx, arg, N, incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ // hipblasAsum accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAsumStridedBatchedFn( handle, N, dx, incx, stridex, batch_count, d_hipblas_result)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasAsumStridedBatchedFn( handle, N, dx, incx, stridex, batch_count, hipblas_result_host)); CHECK_HIP_ERROR(hipMemcpy(hipblas_result_device, d_hipblas_result, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_asum(N, hx.data() + b * stridex, incx, &cpu_result[b]); } if(arg.unit_check) { unit_check_general(1, batch_count, 1, cpu_result, hipblas_result_host); unit_check_general(1, batch_count, 1, cpu_result, hipblas_result_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, batch_count, 1, cpu_result, hipblas_result_host); hipblas_error_device = norm_check_general('F', 1, batch_count, 1, cpu_result, hipblas_result_device); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasAsumStridedBatchedFn( handle, N, dx, incx, stridex, batch_count, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasAsumStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, asum_gflop_count(N), asum_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_axpy.hpp000066400000000000000000000145411434647641600222410ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasAxpyModel = ArgumentModel; inline void testname_axpy(const Arguments& arg, std::string& name) { hipblasAxpyModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_axpy(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasAxpyFn = FORTRAN ? hipblasAxpy : hipblasAxpy; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0) { CHECK_HIPBLAS_ERROR(hipblasAxpyFn(handle, N, nullptr, nullptr, incx, nullptr, incy)); return HIPBLAS_STATUS_SUCCESS; } size_t sizeX = size_t(N) * abs_incx; size_t sizeY = size_t(N) * abs_incy; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; T alpha = arg.get_alpha(); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy_host(sizeY); host_vector hy_device(sizeY); host_vector hx_cpu(sizeX); host_vector hy_cpu(sizeY); device_vector dx(sizeX); device_vector dy_host(sizeY); device_vector dy_device(sizeY); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy_host, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan, false); hy_device = hy_host; // copy vector is easy in STL; hx_cpu = hx: save a copy in hx_cpu which will be output of CPU BLAS hx_cpu = hx; hy_cpu = hy_host; CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy_host, hy_host.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); CHECK_HIP_ERROR( hipMemcpy(dy_device, hy_device.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAxpyFn(handle, N, d_alpha, dx, incx, dy_device, incy)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasAxpyFn(handle, N, &alpha, dx, incx, dy_host, incy)); // copy output from device to CPU CHECK_HIP_ERROR( hipMemcpy(hy_host.data(), dy_host, sizeof(T) * sizeY, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(hy_device.data(), dy_device, sizeof(T) * sizeY, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_axpy(N, alpha, hx_cpu.data(), incx, hy_cpu.data(), incy); if(arg.unit_check) { unit_check_general(1, N, abs_incy, hy_cpu.data(), hy_host.data()); unit_check_general(1, N, abs_incy, hy_cpu.data(), hy_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu.data(), hy_host.data()); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu.data(), hy_device.data()); } } // end of if unit check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasAxpyFn(handle, N, d_alpha, dx, incx, dy_device, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasAxpyModel{}.log_args(std::cout, arg, gpu_time_used, axpy_gflop_count(N), axpy_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_axpy_batched.hpp000066400000000000000000000171761434647641600237220ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasAxpyBatchedModel = ArgumentModel; inline void testname_axpy_batched(const Arguments& arg, std::string& name) { hipblasAxpyBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_axpy_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasAxpyBatchedFn = FORTRAN ? hipblasAxpyBatched : hipblasAxpyBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; int abs_incy = incy < 0 ? -incy : incy; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR( hipblasAxpyBatchedFn(handle, N, nullptr, nullptr, incx, nullptr, incy, batch_count)); return HIPBLAS_STATUS_SUCCESS; } T alpha = arg.get_alpha(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_batch_vector hy_host(N, incy, batch_count); host_batch_vector hy_device(N, incy, batch_count); host_batch_vector hx_cpu(N, incx, batch_count); host_batch_vector hy_cpu(N, incy, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy_host(N, incy, batch_count); device_batch_vector dy_device(N, incy, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy_host.memcheck()); CHECK_HIP_ERROR(dy_device.memcheck()); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy_host, arg, hipblas_client_alpha_sets_nan, false); hy_device.copy_from(hy_host); hx_cpu.copy_from(hx); hy_cpu.copy_from(hy_host); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy_host.transfer_from(hy_host)); CHECK_HIP_ERROR(dy_device.transfer_from(hy_device)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAxpyBatchedFn(handle, N, d_alpha, dx.ptr_on_device(), incx, dy_device.ptr_on_device(), incy, batch_count)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasAxpyBatchedFn(handle, N, &alpha, dx.ptr_on_device(), incx, dy_host.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_host.transfer_from(dy_host)); CHECK_HIP_ERROR(hy_device.transfer_from(dy_device)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_axpy(N, alpha, hx_cpu[b], incx, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device, batch_count); } } // end of if unit check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasAxpyBatchedFn(handle, N, d_alpha, dx.ptr_on_device(), incx, dy_device.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasAxpyBatchedModel{}.log_args(std::cout, arg, gpu_time_used, axpy_gflop_count(N), axpy_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_axpy_batched_ex.hpp000066400000000000000000000256571434647641600244210ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasAxpyBatchedExModel = ArgumentModel; inline void testname_axpy_batched_ex(const Arguments& arg, std::string& name) { hipblasAxpyBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_axpy_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasAxpyBatchedExFn = FORTRAN ? hipblasAxpyBatchedExFortran : hipblasAxpyBatchedEx; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t yType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasAxpyBatchedExFn(handle, N, nullptr, alphaType, nullptr, xType, incx, nullptr, yType, incy, batch_count, executionType)); return HIPBLAS_STATUS_SUCCESS; } int abs_incy = incy < 0 ? -incy : incy; Ta h_alpha = arg.get_alpha(); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_batch_vector hy_host(N, incy, batch_count); host_batch_vector hy_device(N, incy, batch_count); host_batch_vector hy_cpu(N, incy, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy_host, arg, hipblas_client_alpha_sets_nan, false); hy_device.copy_from(hy_host); hy_cpu.copy_from(hy_host); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(Ta), hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasAxpyBatchedExFn(handle, N, &h_alpha, alphaType, dx.ptr_on_device(), xType, incx, dy.ptr_on_device(), yType, incy, batch_count, executionType)); CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dy.transfer_from(hy_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAxpyBatchedExFn(handle, N, d_alpha, alphaType, dx.ptr_on_device(), xType, incx, dy.ptr_on_device(), yType, incy, batch_count, executionType)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_axpy(N, h_alpha, hx[b], incx, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device, batch_count); } } // end of if unit check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasAxpyBatchedExFn(handle, N, d_alpha, alphaType, dx.ptr_on_device(), xType, incx, dy.ptr_on_device(), yType, incy, batch_count, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasAxpyBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, axpy_gflop_count(N), axpy_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_axpy_batched_ex(Arguments arg) { hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t yType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_axpy_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { // Not testing accumulation here status = testing_axpy_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { // Not testing accumulation here status = testing_axpy_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_32F && yType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_axpy_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_64F && xType == HIPBLAS_R_64F && yType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_axpy_batched_ex_template(arg); } else if(alphaType == HIPBLAS_C_32F && xType == HIPBLAS_C_32F && yType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_axpy_batched_ex_template(arg); } else if(alphaType == HIPBLAS_C_64F && xType == HIPBLAS_C_64F && yType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_axpy_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_axpy_ex.hpp000066400000000000000000000216431434647641600227360ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasAxpyExModel = ArgumentModel; inline void testname_axpy_ex(const Arguments& arg, std::string& name) { hipblasAxpyExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_axpy_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasAxpyExFn = FORTRAN ? hipblasAxpyExFortran : hipblasAxpyEx; int N = arg.N; int incx = arg.incx; int incy = arg.incy; hipblasLocalHandle handle(arg); hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t yType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0) { CHECK_HIPBLAS_ERROR(hipblasAxpyExFn(handle, N, nullptr, alphaType, nullptr, xType, incx, nullptr, yType, incy, executionType)); return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; size_t sizeX = size_t(N) * abs_incx; size_t sizeY = size_t(N) * abs_incy; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; Ta h_alpha = arg.get_alpha(); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy_host(sizeY); host_vector hy_device(sizeX); host_vector hy_cpu(sizeY); device_vector dx(sizeX); device_vector dy(sizeY); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy_host, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan, false); hy_device = hy_host; hy_cpu = hy_host; CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy_host, sizeof(Ty) * sizeY, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(Ta), hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasAxpyExFn( handle, N, &h_alpha, alphaType, dx, xType, incx, dy, yType, incy, executionType)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hy_host, dy, sizeof(Ty) * sizeY, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy_device, sizeof(Ty) * sizeY, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAxpyExFn( handle, N, d_alpha, alphaType, dx, xType, incx, dy, yType, incy, executionType)); CHECK_HIP_ERROR(hipMemcpy(hy_device, dy, sizeof(Ty) * sizeY, hipMemcpyDeviceToHost)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU BLAS =================================================================== */ cblas_axpy(N, h_alpha, hx.data(), incx, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device); } } // end of if unit check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasAxpyExFn( handle, N, d_alpha, alphaType, dx, xType, incx, dy, yType, incy, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasAxpyExModel{}.log_args(std::cout, arg, gpu_time_used, axpy_gflop_count(N), axpy_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_axpy_ex(Arguments arg) { hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t yType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_axpy_ex_template(arg); } else if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { // Not testing accumulation here status = testing_axpy_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { // Not testing accumulation here status = testing_axpy_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_32F && yType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_axpy_ex_template(arg); } else if(alphaType == HIPBLAS_R_64F && xType == HIPBLAS_R_64F && yType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_axpy_ex_template(arg); } else if(alphaType == HIPBLAS_C_32F && xType == HIPBLAS_C_32F && yType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_axpy_ex_template(arg); } else if(alphaType == HIPBLAS_C_64F && xType == HIPBLAS_C_64F && yType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_axpy_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_axpy_strided_batched.hpp000066400000000000000000000170431434647641600254310ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasAxpyStridedBatchedModel = ArgumentModel; inline void testname_axpy_strided_batched(const Arguments& arg, std::string& name) { hipblasAxpyStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_axpy_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasAxpyStridedBatchedFn = FORTRAN ? hipblasAxpyStridedBatched : hipblasAxpyStridedBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; T alpha = arg.get_alpha(); int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; hipblasStride stridex = size_t(N) * abs_incx * stride_scale; hipblasStride stridey = size_t(N) * abs_incy * stride_scale; size_t sizeX = stridex * batch_count; size_t sizeY = stridey * batch_count; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasAxpyStridedBatchedFn( handle, N, nullptr, nullptr, incx, stridex, nullptr, incy, stridey, batch_count)); return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy_host(sizeY); host_vector hy_device(sizeY); host_vector hx_cpu(sizeX); host_vector hy_cpu(sizeY); device_vector dx(sizeX); device_vector dy_host(sizeY); device_vector dy_device(sizeY); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector( hy_host, arg, N, abs_incy, stridey, batch_count, hipblas_client_alpha_sets_nan, false); hy_device = hy_host; // copy vector is easy in STL; hx_cpu = hx: save a copy in hx_cpu which will be output of CPU BLAS hx_cpu = hx; hy_cpu = hy_host; CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy_host, hy_host.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); CHECK_HIP_ERROR( hipMemcpy(dy_device, hy_device.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAxpyStridedBatchedFn( handle, N, d_alpha, dx, incx, stridex, dy_device, incy, stridey, batch_count)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasAxpyStridedBatchedFn( handle, N, &alpha, dx, incx, stridex, dy_host, incy, stridey, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR( hipMemcpy(hy_host.data(), dy_host, sizeof(T) * sizeY, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(hy_device.data(), dy_device, sizeof(T) * sizeY, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_axpy( N, alpha, hx_cpu.data() + b * stridex, incx, hy_cpu.data() + b * stridey, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general( 1, N, batch_count, abs_incy, stridex, hy_cpu.data(), hy_host.data()); unit_check_general( 1, N, batch_count, abs_incy, stridey, hy_cpu.data(), hy_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, N, abs_incy, stridey, hy_cpu.data(), hy_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', 1, N, abs_incy, stridey, hy_cpu.data(), hy_device.data(), batch_count); } } // end of if unit check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasAxpyStridedBatchedFn( handle, N, d_alpha, dx, incx, stridex, dy_device, incy, stridey, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasAxpyStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, axpy_gflop_count(N), axpy_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_axpy_strided_batched_ex.hpp000066400000000000000000000306401434647641600261230ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasAxpyStridedBatchedExModel = ArgumentModel; inline void testname_axpy_strided_batched_ex(const Arguments& arg, std::string& name) { hipblasAxpyStridedBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_axpy_strided_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasAxpyStridedBatchedExFn = FORTRAN ? hipblasAxpyStridedBatchedExFortran : hipblasAxpyStridedBatchedEx; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; hipblasStride stridex = size_t(N) * abs_incx * stride_scale; hipblasStride stridey = size_t(N) * abs_incy * stride_scale; hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t yType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasAxpyStridedBatchedExFn(handle, N, nullptr, alphaType, nullptr, xType, incx, stridex, nullptr, yType, incy, stridey, batch_count, executionType)); return HIPBLAS_STATUS_SUCCESS; } size_t sizeX = stridex * batch_count; size_t sizeY = stridey * batch_count; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; Ta h_alpha = arg.get_alpha(); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy_cpu(sizeY); host_vector hy_host(sizeY); host_vector hy_device(sizeY); device_vector dx(sizeX); device_vector dy(sizeY); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector( hy_host, arg, N, abs_incy, stridey, batch_count, hipblas_client_alpha_sets_nan, false); hy_device = hy_host; hy_cpu = hy_host; CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy_host, sizeof(Ty) * sizeY, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(Ta), hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasAxpyStridedBatchedExFn(handle, N, &h_alpha, alphaType, dx, xType, incx, stridex, dy, yType, incy, stridey, batch_count, executionType)); CHECK_HIP_ERROR(hipMemcpy(hy_host, dy, sizeof(Ty) * sizeY, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy_device, sizeof(Ty) * sizeY, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasAxpyStridedBatchedExFn(handle, N, d_alpha, alphaType, dx, xType, incx, stridex, dy, yType, incy, stridey, batch_count, executionType)); CHECK_HIP_ERROR(hipMemcpy(hy_device, dy, sizeof(Ty) * sizeY, hipMemcpyDeviceToHost)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_axpy(N, h_alpha, hx.data() + b * stridex, incx, hy_cpu + b * stridey, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, stridey, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incy, stridey, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, N, abs_incy, stridey, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, N, abs_incy, stridey, hy_cpu, hy_device, batch_count); } } // end of if unit check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasAxpyStridedBatchedExFn(handle, N, d_alpha, alphaType, dx, xType, incx, stridex, dy, yType, incy, stridey, batch_count, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasAxpyStridedBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, axpy_gflop_count(N), axpy_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_axpy_strided_batched_ex(Arguments arg) { hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t yType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_axpy_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { // Not testing accumulation here status = testing_axpy_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { // Not testing accumulation here status = testing_axpy_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_32F && yType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_axpy_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_64F && xType == HIPBLAS_R_64F && yType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_axpy_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_C_32F && xType == HIPBLAS_C_32F && yType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_axpy_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_C_64F && xType == HIPBLAS_C_64F && yType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_axpy_strided_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_common.hpp000066400000000000000000000033721434647641600225500ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #pragma once #ifndef _TESTING_COMMON_HPP_ #define _TESTING_COMMON_HPP_ // do not add special case includes here, keep those in the testing_ file #include "argument_model.hpp" #include "bytes.hpp" #include "cblas_interface.h" #include "flops.hpp" #include "hipblas.hpp" #ifndef WIN32 #include "hipblas_fortran.hpp" #else #include "hipblas_no_fortran.hpp" #endif #include "hipblas_vector.hpp" #include "near.h" #include "norm.h" #include "unit.h" #include "utility.h" #endif hipBLAS-rocm-5.5.1/clients/include/testing_copy.hpp000066400000000000000000000124471434647641600222350ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasCopyModel = ArgumentModel; inline void testname_copy(const Arguments& arg, std::string& name) { hipblasCopyModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_copy(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasCopyFn = FORTRAN ? hipblasCopy : hipblasCopy; int N = arg.N; int incx = arg.incx; int incy = arg.incy; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0) { CHECK_HIPBLAS_ERROR(hipblasCopyFn(handle, N, nullptr, incx, nullptr, incy)); return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t sizeX = size_t(N) * abs_incx; size_t sizeY = size_t(N) * abs_incy; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy(sizeY); host_vector hx_cpu(sizeX); host_vector hy_cpu(sizeY); // allocate memory on device device_vector dx(sizeX); device_vector dy(sizeY); double hipblas_error = 0.0; double gpu_time_used = 0.0; // Initial Data on CPU hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan, false); hx_cpu = hx; hy_cpu = hy; CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasCopyFn(handle, N, dx, incx, dy, incy)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx.data(), dx, sizeof(T) * sizeX, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hy.data(), dy, sizeof(T) * sizeY, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_copy(N, hx_cpu.data(), incx, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, abs_incy, hy_cpu.data(), hy.data()); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy); } } // end of if unit check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasCopyFn(handle, N, dx, incx, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasCopyModel{}.log_args(std::cout, arg, gpu_time_used, copy_gflop_count(N), copy_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_copy_batched.hpp000066400000000000000000000127031434647641600237020ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasCopyBatchedModel = ArgumentModel; inline void testname_copy_batched(const Arguments& arg, std::string& name) { hipblasCopyBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_copy_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasCopyBatchedFn = FORTRAN ? hipblasCopyBatched : hipblasCopyBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR( hipblasCopyBatchedFn(handle, N, nullptr, incx, nullptr, incy, batch_count)); return HIPBLAS_STATUS_SUCCESS; } int abs_incy = incy >= 0 ? incy : -incy; double hipblas_error = 0.0; double gpu_time_used = 0.0; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); host_batch_vector hx_cpu(N, incx, batch_count); host_batch_vector hy_cpu(N, incy, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy, arg, hipblas_client_alpha_sets_nan, false); hx_cpu.copy_from(hx); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasCopyBatchedFn( handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hx.transfer_from(dx)); CHECK_HIP_ERROR(hy.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_copy(N, hx_cpu[b], incx, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy, batch_count); } } // end of if unit check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasCopyBatchedFn( handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasCopyBatchedModel{}.log_args(std::cout, arg, gpu_time_used, copy_gflop_count(N), copy_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_copy_strided_batched.hpp000066400000000000000000000142231434647641600254170ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasCopyStridedBatchedModel = ArgumentModel; inline void testname_copy_strided_batched(const Arguments& arg, std::string& name) { hipblasCopyStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_copy_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasCopyStridedBatchedFn = FORTRAN ? hipblasCopyStridedBatched : hipblasCopyStridedBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stridex = size_t(N) * abs_incx * stride_scale; hipblasStride stridey = size_t(N) * abs_incy * stride_scale; size_t sizeX = stridex * batch_count; size_t sizeY = stridey * batch_count; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasCopyStridedBatchedFn( handle, N, nullptr, incx, stridex, nullptr, incy, stridey, batch_count)); return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy(sizeY); host_vector hx_cpu(sizeX); host_vector hy_cpu(sizeY); device_vector dx(sizeX); device_vector dy(sizeY); double gpu_time_used = 0.0; double hipblas_error = 0.0; // Initial Data on CPU hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector( hy, arg, N, abs_incy, stridey, batch_count, hipblas_client_alpha_sets_nan, false); hx_cpu = hx; hy_cpu = hy; CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasCopyStridedBatchedFn( handle, N, dx, incx, stridex, dy, incy, stridey, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx.data(), dx, sizeof(T) * sizeX, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hy.data(), dy, sizeof(T) * sizeY, hipMemcpyDeviceToHost)); /*===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_copy(N, hx_cpu.data() + b * stridex, incx, hy_cpu.data() + b * stridey, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, stridey, hy_cpu.data(), hy.data()); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, N, abs_incy, stridey, hy_cpu, hy, batch_count); } } // end of if unit check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasCopyStridedBatchedFn( handle, N, dx, incx, stridex, dy, incy, stridey, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasCopyStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, copy_gflop_count(N), copy_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_dgmm.hpp000066400000000000000000000146011434647641600222010ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasDgmmModel = ArgumentModel; inline void testname_dgmm(const Arguments& arg, std::string& name) { hipblasDgmmModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_dgmm(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasDgmmFn = FORTRAN ? hipblasDgmm : hipblasDgmm; hipblasSideMode_t side = char2hipblas_side(arg.side); int M = arg.M; int N = arg.N; int lda = arg.lda; int incx = arg.incx; int ldc = arg.ldc; int abs_incx = incx >= 0 ? incx : -incx; size_t A_size = size_t(lda) * N; size_t C_size = size_t(ldc) * N; int k = (side == HIPBLAS_SIDE_RIGHT ? N : M); size_t X_size = size_t(abs_incx) * k; if(!X_size) X_size = 1; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || ldc < M || lda < M; if(invalid_size || !N || !M) { hipblasStatus_t actual = hipblasDgmmFn(handle, side, M, N, nullptr, lda, nullptr, incx, nullptr, ldc); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_copy(A_size); host_vector hx(X_size); host_vector hx_copy(X_size); host_vector hC(C_size); host_vector hC_1(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dC(C_size); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix(hA, arg, M, N, lda, 0, 1, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, k, abs_incx, 0, 1, hipblas_client_never_set_nan, false, true); hipblas_init_matrix(hC, arg, M, N, ldc, 0, 1, hipblas_client_never_set_nan); hA_copy = hA; hx_copy = hx; hC_1 = hC; hC_gold = hC; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC.data(), sizeof(T) * C_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasDgmmFn(handle, side, M, N, dA, lda, dx, incx, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_1.data(), dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ // reference calculation ptrdiff_t shift_x = incx < 0 ? -ptrdiff_t(incx) * (N - 1) : 0; for(size_t i1 = 0; i1 < M; i1++) { for(size_t i2 = 0; i2 < N; i2++) { if(HIPBLAS_SIDE_RIGHT == side) { hC_gold[i1 + i2 * ldc] = hA_copy[i1 + i2 * lda] * hx_copy[shift_x + i2 * incx]; } else { hC_gold[i1 + i2 * ldc] = hA_copy[i1 + i2 * lda] * hx_copy[shift_x + i1 * incx]; } } } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, ldc, hC_gold, hC_1); } if(arg.norm_check) { hipblas_error = norm_check_general('F', M, N, ldc, hC_gold, hC_1); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasDgmmFn(handle, side, M, N, dA, lda, dx, incx, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasDgmmModel{}.log_args(std::cout, arg, gpu_time_used, dgmm_gflop_count(M, N), dgmm_gbyte_count(M, N, k), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_dgmm_batched.hpp000066400000000000000000000175721434647641600236650ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasDgmmBatchedModel = ArgumentModel; inline void testname_dgmm_batched(const Arguments& arg, std::string& name) { hipblasDgmmBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_dgmm_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasDgmmBatchedFn = FORTRAN ? hipblasDgmmBatched : hipblasDgmmBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); int M = arg.M; int N = arg.N; int lda = arg.lda; int incx = arg.incx; int ldc = arg.ldc; int batch_count = arg.batch_count; size_t A_size = size_t(lda) * N; size_t C_size = size_t(ldc) * N; int k = (side == HIPBLAS_SIDE_RIGHT ? N : M); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || ldc < M || lda < M || batch_count < 0; if(invalid_size || !N || !M || !batch_count) { hipblasStatus_t actual = hipblasDgmmBatchedFn( handle, side, M, N, nullptr, lda, nullptr, incx, nullptr, ldc, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_copy(A_size, 1, batch_count); host_batch_vector hx(k, incx, batch_count); host_batch_vector hx_copy(k, incx, batch_count); host_batch_vector hC(C_size, 1, batch_count); host_batch_vector hC_1(C_size, 1, batch_count); host_batch_vector hC_gold(C_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(k, incx, batch_count); device_batch_vector dC(C_size, 1, batch_count); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_never_set_nan, false, true); hipblas_init_vector(hC, arg, hipblas_client_never_set_nan); hA_copy.copy_from(hA); hx_copy.copy_from(hx); hC_1.copy_from(hC); hC_gold.copy_from(hC_gold); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dC.transfer_from(hC)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasDgmmBatchedFn(handle, side, M, N, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_1.transfer_from(dC)); /* ===================================================================== CPU BLAS =================================================================== */ // reference calculation ptrdiff_t shift_x = incx < 0 ? -ptrdiff_t(incx) * (N - 1) : 0; for(int b = 0; b < batch_count; b++) { for(size_t i1 = 0; i1 < M; i1++) { for(size_t i2 = 0; i2 < N; i2++) { if(HIPBLAS_SIDE_RIGHT == side) { hC_gold[b][i1 + i2 * ldc] = hA_copy[b][i1 + i2 * lda] * hx_copy[b][shift_x + i2 * incx]; } else { hC_gold[b][i1 + i2 * ldc] = hA_copy[b][i1 + i2 * lda] * hx_copy[b][shift_x + i1 * incx]; } } } } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, hC_gold, hC_1); } if(arg.norm_check) { hipblas_error = norm_check_general('F', M, N, ldc, hC_gold, hC_1, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasDgmmBatchedFn(handle, side, M, N, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasDgmmBatchedModel{}.log_args(std::cout, arg, gpu_time_used, dgmm_gflop_count(M, N), dgmm_gbyte_count(M, N, k), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_dgmm_strided_batched.hpp000066400000000000000000000235611434647641600253760ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasDgmmStridedBatchedModel = ArgumentModel; inline void testname_dgmm_strided_batched(const Arguments& arg, std::string& name) { hipblasDgmmStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_dgmm_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasDgmmStridedBatchedFn = FORTRAN ? hipblasDgmmStridedBatched : hipblasDgmmStridedBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); int M = arg.M; int N = arg.N; int lda = arg.lda; int incx = arg.incx; int ldc = arg.ldc; int batch_count = arg.batch_count; double stride_scale = arg.stride_scale; int k = (side == HIPBLAS_SIDE_RIGHT ? N : M); int abs_incx = incx >= 0 ? incx : -incx; hipblasStride stride_A = size_t(lda) * N * stride_scale; hipblasStride stride_x = size_t(abs_incx) * k * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; if(!stride_x) stride_x = 1; size_t A_size = size_t(stride_A) * batch_count; size_t C_size = size_t(stride_C) * batch_count; size_t X_size = size_t(stride_x) * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || ldc < M || lda < M || batch_count < 0; if(invalid_size || !N || !M || !batch_count) { hipblasStatus_t actual = hipblasDgmmStridedBatchedFn(handle, side, M, N, nullptr, lda, stride_A, nullptr, incx, stride_x, nullptr, ldc, stride_C, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_copy(A_size); host_vector hx(X_size); host_vector hx_copy(X_size); host_vector hC(C_size); host_vector hC_1(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dC(C_size); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix( hA, arg, M, N, lda, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, k, abs_incx, stride_x, batch_count, hipblas_client_never_set_nan, false, true); hipblas_init_matrix(hC, arg, M, N, ldc, stride_C, batch_count, hipblas_client_never_set_nan); hA_copy = hA; hx_copy = hx; hC_1 = hC; hC_gold = hC; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC.data(), sizeof(T) * C_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasDgmmStridedBatchedFn(handle, side, M, N, dA, lda, stride_A, dx, incx, stride_x, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_1.data(), dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ // reference calculation ptrdiff_t shift_x = incx < 0 ? -ptrdiff_t(incx) * (N - 1) : 0; for(int b = 0; b < batch_count; b++) { auto hC_goldb = hC_gold + b * stride_C; auto hA_copyb = hA_copy + b * stride_A; auto hx_copyb = hx_copy + b * stride_x; for(size_t i1 = 0; i1 < M; i1++) { for(size_t i2 = 0; i2 < N; i2++) { if(HIPBLAS_SIDE_RIGHT == side) { hC_goldb[i1 + i2 * ldc] = hA_copyb[i1 + i2 * lda] * hx_copyb[shift_x + i2 * incx]; } else { hC_goldb[i1 + i2 * ldc] = hA_copyb[i1 + i2 * lda] * hx_copyb[shift_x + i1 * incx]; } } } } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, stride_C, hC_gold, hC_1); } if(arg.norm_check) { hipblas_error = norm_check_general('F', M, N, ldc, stride_C, hC_gold, hC_1, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasDgmmStridedBatchedFn(handle, side, M, N, dA, lda, stride_A, dx, incx, stride_x, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasDgmmStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, dgmm_gflop_count(M, N), dgmm_gbyte_count(M, N, k), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_dot.hpp000066400000000000000000000154631434647641600220520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasDotModel = ArgumentModel; inline void testname_dot(const Arguments& arg, std::string& name) { hipblasDotModel{}.test_name(arg, name); } inline void testname_dotc(const Arguments& arg, std::string& name) { hipblasDotModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_dot(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasDotFn = FORTRAN ? (CONJ ? hipblasDotc : hipblasDot) : (CONJ ? hipblasDotc : hipblasDot); int N = arg.N; int incx = arg.incx; int incy = arg.incy; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0) { device_vector d_hipblas_result_0(1); host_vector h_hipblas_result_0(1); hipblas_init_nan(h_hipblas_result_0.data(), 1); CHECK_HIP_ERROR( hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasDotFn(handle, N, nullptr, incx, nullptr, incy, d_hipblas_result_0)); host_vector cpu_0(1); host_vector gpu_0(1); CHECK_HIP_ERROR(hipMemcpy(gpu_0, d_hipblas_result_0, sizeof(T), hipMemcpyDeviceToHost)); unit_check_general(1, 1, 1, cpu_0, gpu_0); return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t sizeX = size_t(N) * abs_incx; size_t sizeY = size_t(N) * abs_incy; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy(sizeY); T cpu_result, h_hipblas_result_1, h_hipblas_result_2; device_vector dx(sizeX); device_vector dy(sizeY); device_vector d_hipblas_result(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, true, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan, false); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ // hipblasDot accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR((hipblasDotFn)(handle, N, dx, incx, dy, incy, d_hipblas_result)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR((hipblasDotFn)(handle, N, dx, incx, dy, incy, &h_hipblas_result_1)); CHECK_HIP_ERROR( hipMemcpy(&h_hipblas_result_2, d_hipblas_result, sizeof(T), hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ (CONJ ? cblas_dotc : cblas_dot)(N, hx.data(), incx, hy.data(), incy, &cpu_result); if(arg.unit_check) { unit_check_general(1, 1, 1, &cpu_result, &h_hipblas_result_1); unit_check_general(1, 1, 1, &cpu_result, &h_hipblas_result_2); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, 1, 1, &cpu_result, &h_hipblas_result_1); hipblas_error_device = norm_check_general('F', 1, 1, 1, &cpu_result, &h_hipblas_result_2); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR((hipblasDotFn)(handle, N, dx, incx, dy, incy, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasDotModel{}.log_args(std::cout, arg, gpu_time_used, dot_gflop_count(N), dot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_dotc(const Arguments& arg) { return testing_dot(arg); } hipBLAS-rocm-5.5.1/clients/include/testing_dot_batched.hpp000066400000000000000000000212121434647641600235110ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasDotBatchedModel = ArgumentModel; inline void testname_dot_batched(const Arguments& arg, std::string& name) { hipblasDotBatchedModel{}.test_name(arg, name); } inline void testname_dotc_batched(const Arguments& arg, std::string& name) { hipblasDotBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_dot_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasDotBatchedFn = FORTRAN ? (CONJ ? hipblasDotcBatched : hipblasDotBatched) : (CONJ ? hipblasDotcBatched : hipblasDotBatched); int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(batch_count, 1)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(T) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasDotBatchedFn( handle, N, nullptr, incx, nullptr, incy, batch_count, d_hipblas_result_0)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(T) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t sizeX = size_t(N) * abs_incx; size_t sizeY = size_t(N) * abs_incy; double gpu_time_used, hipblas_error_host, hipblas_error_device; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); host_vector h_cpu_result(batch_count); host_vector h_hipblas_result1(batch_count); host_vector h_hipblas_result2(batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_hipblas_result(batch_count); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true, true); hipblas_init_vector(hy, arg, hipblas_client_alpha_sets_nan, false); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ // hipblasDot accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR((hipblasDotBatchedFn)(handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, batch_count, d_hipblas_result)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR((hipblasDotBatchedFn)(handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, batch_count, h_hipblas_result1)); CHECK_HIP_ERROR(hipMemcpy( h_hipblas_result2, d_hipblas_result, sizeof(T) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { (CONJ ? cblas_dotc : cblas_dot)(N, hx[b], incx, hy[b], incy, &(h_cpu_result[b])); } if(arg.unit_check) { unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result1); unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result2); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, batch_count, 1, h_cpu_result, h_hipblas_result1); hipblas_error_device = norm_check_general('F', 1, batch_count, 1, h_cpu_result, h_hipblas_result2); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR((hipblasDotBatchedFn)(handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, batch_count, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasDotBatchedModel{}.log_args(std::cout, arg, gpu_time_used, dot_gflop_count(N), dot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_dotc_batched(const Arguments& arg) { return testing_dot_batched(arg); } hipBLAS-rocm-5.5.1/clients/include/testing_dot_batched_ex.hpp000066400000000000000000000417341434647641600242200ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasDotBatchedExModel = ArgumentModel; inline void testname_dot_batched_ex(const Arguments& arg, std::string& name) { hipblasDotBatchedExModel{}.test_name(arg, name); } inline void testname_dotc_batched_ex(const Arguments& arg, std::string& name) { hipblasDotBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_dot_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasDotBatchedExFn = FORTRAN ? (CONJ ? hipblasDotcBatchedExFortran : hipblasDotBatchedExFortran) : (CONJ ? hipblasDotcBatchedEx : hipblasDotBatchedEx); int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; hipblasLocalHandle handle(arg); hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t resultType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(batch_count, 1)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasDotBatchedExFn(handle, N, nullptr, xType, incx, nullptr, yType, incy, batch_count, d_hipblas_result_0, resultType, executionType)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); host_vector h_cpu_result(batch_count); host_vector h_hipblas_result_host(batch_count); host_vector h_hipblas_result_device(batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_hipblas_result(batch_count); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init(hy, true, false); hipblas_init_alternating_sign(hx); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasDotBatchedExFn(handle, N, dx.ptr_on_device(), xType, incx, dy.ptr_on_device(), yType, incy, batch_count, h_hipblas_result_host, resultType, executionType)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasDotBatchedExFn(handle, N, dx.ptr_on_device(), xType, incx, dy.ptr_on_device(), yType, incy, batch_count, d_hipblas_result, resultType, executionType)); CHECK_HIP_ERROR(hipMemcpy(h_hipblas_result_device, d_hipblas_result, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { (CONJ ? cblas_dotc : cblas_dot)(N, hx[b], incx, hy[b], incy, &(h_cpu_result[b])); } if(arg.unit_check) { if(std::is_same{}) { double tol = error_tolerance * N; near_check_general(1, 1, batch_count, 1, 1, h_cpu_result.data(), h_hipblas_result_host.data(), tol); near_check_general(1, 1, batch_count, 1, 1, h_cpu_result.data(), h_hipblas_result_device.data(), tol); } else { unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result_host); unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result_device); } } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, batch_count, 1, h_cpu_result, h_hipblas_result_host); hipblas_error_device = norm_check_general( 'F', 1, batch_count, 1, h_cpu_result, h_hipblas_result_device); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasDotBatchedExFn(handle, N, dx.ptr_on_device(), xType, incx, dy.ptr_on_device(), yType, incy, batch_count, d_hipblas_result, resultType, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasDotBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, dot_gflop_count(N), dot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_dot_batched_ex(const Arguments& arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t resultType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_dot_batched_ex_template( arg); } else if(xType == HIPBLAS_R_16B && yType == HIPBLAS_R_16B && resultType == HIPBLAS_R_16B && executionType == HIPBLAS_R_32F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_R_32F && yType == HIPBLAS_R_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_R_64F && yType == HIPBLAS_R_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_C_32F && yType == HIPBLAS_C_32F && resultType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_C_64F && yType == HIPBLAS_C_64F && resultType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_dot_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } inline hipblasStatus_t testing_dotc_batched_ex(const Arguments& arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t resultType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_dot_batched_ex_template( arg); } else if(xType == HIPBLAS_R_16B && yType == HIPBLAS_R_16B && resultType == HIPBLAS_R_16B && executionType == HIPBLAS_R_32F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_R_32F && yType == HIPBLAS_R_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_R_64F && yType == HIPBLAS_R_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_C_32F && yType == HIPBLAS_C_32F && resultType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_dot_batched_ex_template(arg); } else if(xType == HIPBLAS_C_64F && yType == HIPBLAS_C_64F && resultType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_dot_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_dot_ex.hpp000066400000000000000000000347221434647641600225450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasDotExModel = ArgumentModel; inline void testname_dot_ex(const Arguments& arg, std::string& name) { hipblasDotExModel{}.test_name(arg, name); } inline void testname_dotc_ex(const Arguments& arg, std::string& name) { hipblasDotExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_dot_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasDotExFn = FORTRAN ? (CONJ ? hipblasDotcExFortran : hipblasDotExFortran) : (CONJ ? hipblasDotcEx : hipblasDotEx); int N = arg.N; int incx = arg.incx; int incy = arg.incy; hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t resultType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0) { device_vector d_hipblas_result_0(1); host_vector h_hipblas_result_0(1); hipblas_init_nan(h_hipblas_result_0.data(), 1); CHECK_HIP_ERROR( hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasDotExFn(handle, N, nullptr, xType, incx, nullptr, yType, incy, d_hipblas_result_0, resultType, executionType)); host_vector cpu_0(1); host_vector gpu_0(1); CHECK_HIP_ERROR(hipMemcpy(gpu_0, d_hipblas_result_0, sizeof(Tr), hipMemcpyDeviceToHost)); unit_check_general(1, 1, 1, cpu_0, gpu_0); return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t sizeX = size_t(N) * abs_incx; size_t sizeY = size_t(N) * abs_incy; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy(sizeY); device_vector dx(sizeX); device_vector dy(sizeY); device_vector d_hipblas_result(1); Tr cpu_result, hipblas_result_host, hipblas_result_device; double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, true, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan, false); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(Ty) * sizeY, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasDotExFn(handle, N, dx, xType, incx, dy, yType, incy, &hipblas_result_host, resultType, executionType)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasDotExFn(handle, N, dx, xType, incx, dy, yType, incy, d_hipblas_result, resultType, executionType)); CHECK_HIP_ERROR( hipMemcpy(&hipblas_result_device, d_hipblas_result, sizeof(Tr), hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ (CONJ ? cblas_dotc : cblas_dot)(N, hx.data(), incx, hy.data(), incy, &cpu_result); if(arg.unit_check) { if(std::is_same{}) { double tol = error_tolerance * N; near_check_general(1, 1, 1, &cpu_result, &hipblas_result_host, tol); near_check_general(1, 1, 1, &cpu_result, &hipblas_result_device, tol); } else { unit_check_general(1, 1, 1, &cpu_result, &hipblas_result_host); unit_check_general(1, 1, 1, &cpu_result, &hipblas_result_device); } } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, 1, 1, &cpu_result, &hipblas_result_host); hipblas_error_device = norm_check_general('F', 1, 1, 1, &cpu_result, &hipblas_result_device); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasDotExFn(handle, N, dx, xType, incx, dy, yType, incy, d_hipblas_result, resultType, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasDotExModel{}.log_args(std::cout, arg, gpu_time_used, dot_gflop_count(N), dot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_dot_ex(const Arguments& arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t resultType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_dot_ex_template( arg); } else if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_R_16B && yType == HIPBLAS_R_16B && resultType == HIPBLAS_R_16B && executionType == HIPBLAS_R_32F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_R_32F && yType == HIPBLAS_R_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_R_64F && yType == HIPBLAS_R_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_C_32F && yType == HIPBLAS_C_32F && resultType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_C_64F && yType == HIPBLAS_C_64F && resultType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_dot_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } inline hipblasStatus_t testing_dotc_ex(const Arguments& arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t resultType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_dot_ex_template( arg); } else if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_R_16B && yType == HIPBLAS_R_16B && resultType == HIPBLAS_R_16B && executionType == HIPBLAS_R_32F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_R_32F && yType == HIPBLAS_R_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_R_64F && yType == HIPBLAS_R_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_C_32F && yType == HIPBLAS_C_32F && resultType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_dot_ex_template(arg); } else if(xType == HIPBLAS_C_64F && yType == HIPBLAS_C_64F && resultType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_dot_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_dot_strided_batched.hpp000066400000000000000000000246541434647641600252440ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasDotStridedBatchedModel = ArgumentModel; inline void testname_dot_strided_batched(const Arguments& arg, std::string& name) { hipblasDotStridedBatchedModel{}.test_name(arg, name); } inline void testname_dotc_strided_batched(const Arguments& arg, std::string& name) { hipblasDotStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_dot_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasDotStridedBatchedFn = FORTRAN ? (CONJ ? hipblasDotcStridedBatched : hipblasDotStridedBatched) : (CONJ ? hipblasDotcStridedBatched : hipblasDotStridedBatched); int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stridex = size_t(N) * abs_incx * stride_scale; hipblasStride stridey = size_t(N) * abs_incy * stride_scale; size_t sizeX = stridex * batch_count; size_t sizeY = stridey * batch_count; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(batch_count, 1)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(T) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasDotStridedBatchedFn(handle, N, nullptr, incx, stridex, nullptr, incy, stridey, batch_count, d_hipblas_result_0)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(T) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy(sizeY); host_vector h_hipblas_result1(batch_count); host_vector h_hipblas_result2(batch_count); host_vector h_cpu_result(batch_count); device_vector dx(sizeX); device_vector dy(sizeY); device_vector d_hipblas_result(batch_count); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true, true); hipblas_init_vector( hy, arg, N, abs_incy, stridey, batch_count, hipblas_client_alpha_sets_nan, false); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ // hipblasDot accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR((hipblasDotStridedBatchedFn)(handle, N, dx, incx, stridex, dy, incy, stridey, batch_count, d_hipblas_result)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR((hipblasDotStridedBatchedFn)(handle, N, dx, incx, stridex, dy, incy, stridey, batch_count, h_hipblas_result1)); CHECK_HIP_ERROR(hipMemcpy( h_hipblas_result2, d_hipblas_result, sizeof(T) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { (CONJ ? cblas_dotc : cblas_dot)(N, hx.data() + b * stridex, incx, hy.data() + b * stridey, incy, &h_cpu_result[b]); } if(arg.unit_check) { unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result1); unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result2); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, batch_count, 1, h_cpu_result, h_hipblas_result1); hipblas_error_device = norm_check_general('F', 1, batch_count, 1, h_cpu_result, h_hipblas_result2); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR((hipblasDotStridedBatchedFn)(handle, N, dx, incx, stridex, dy, incy, stridey, batch_count, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasDotStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, dot_gflop_count(N), dot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_dotc_strided_batched(const Arguments& arg) { return testing_dot_strided_batched(arg); } hipBLAS-rocm-5.5.1/clients/include/testing_dot_strided_batched_ex.hpp000066400000000000000000000466501434647641600257400ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasDotStridedBatchedExModel = ArgumentModel; inline void testname_dot_strided_batched_ex(const Arguments& arg, std::string& name) { hipblasDotStridedBatchedExModel{}.test_name(arg, name); } inline void testname_dotc_strided_batched_ex(const Arguments& arg, std::string& name) { hipblasDotStridedBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_dot_strided_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasDotStridedBatchedExFn = FORTRAN ? (CONJ ? hipblasDotcStridedBatchedExFortran : hipblasDotStridedBatchedExFortran) : (CONJ ? hipblasDotcStridedBatchedEx : hipblasDotStridedBatchedEx); int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stridex = size_t(N) * abs_incx * stride_scale; hipblasStride stridey = size_t(N) * abs_incy * stride_scale; size_t sizeX = stridex * batch_count; size_t sizeY = stridey * batch_count; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; hipblasLocalHandle handle(arg); hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t resultType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(batch_count, 1)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasDotStridedBatchedExFn(handle, N, nullptr, xType, incx, stridex, nullptr, yType, incy, stridey, batch_count, d_hipblas_result_0, resultType, executionType)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy(sizeY); host_vector h_hipblas_result_host(batch_count); host_vector h_hipblas_result_device(batch_count); host_vector h_cpu_result(batch_count); device_vector dx(sizeX); device_vector dy(sizeY); device_vector d_hipblas_result(batch_count); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true, true); hipblas_init_vector( hy, arg, N, abs_incy, stridey, batch_count, hipblas_client_alpha_sets_nan, false); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(Ty) * sizeY, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasDotStridedBatchedExFn(handle, N, dx, xType, incx, stridex, dy, yType, incy, stridey, batch_count, h_hipblas_result_host, resultType, executionType)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasDotStridedBatchedExFn(handle, N, dx, xType, incx, stridex, dy, yType, incy, stridey, batch_count, d_hipblas_result, resultType, executionType)); CHECK_HIP_ERROR(hipMemcpy(h_hipblas_result_device, d_hipblas_result, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { (CONJ ? cblas_dotc : cblas_dot)(N, hx.data() + b * stridex, incx, hy.data() + b * stridey, incy, &h_cpu_result[b]); } if(arg.unit_check) { if(std::is_same{}) { double tol = error_tolerance * N; near_check_general(1, 1, batch_count, 1, 1, h_cpu_result.data(), h_hipblas_result_host.data(), tol); near_check_general(1, 1, batch_count, 1, 1, h_cpu_result.data(), h_hipblas_result_device.data(), tol); } else { unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result_host); unit_check_general(1, batch_count, 1, h_cpu_result, h_hipblas_result_device); } } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, batch_count, 1, h_cpu_result, h_hipblas_result_host); hipblas_error_device = norm_check_general( 'F', 1, batch_count, 1, h_cpu_result, h_hipblas_result_device); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasDotStridedBatchedExFn(handle, N, dx, xType, incx, stridex, dy, yType, incy, stridey, batch_count, d_hipblas_result, resultType, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasDotStridedBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, dot_gflop_count(N), dot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_dot_strided_batched_ex(const Arguments& arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t resultType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_16B && yType == HIPBLAS_R_16B && resultType == HIPBLAS_R_16B && executionType == HIPBLAS_R_32F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_32F && yType == HIPBLAS_R_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_64F && yType == HIPBLAS_R_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_C_32F && yType == HIPBLAS_C_32F && resultType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_C_64F && yType == HIPBLAS_C_64F && resultType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_dot_strided_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } inline hipblasStatus_t testing_dotc_strided_batched_ex(const Arguments& arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t resultType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_16F && yType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_16B && yType == HIPBLAS_R_16B && resultType == HIPBLAS_R_16B && executionType == HIPBLAS_R_32F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_32F && yType == HIPBLAS_R_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_64F && yType == HIPBLAS_R_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_C_32F && yType == HIPBLAS_C_32F && resultType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_dot_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_C_64F && yType == HIPBLAS_C_64F && resultType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_dot_strided_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_exceptions.hpp000066400000000000000000000035311434647641600234360ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "testing_common.hpp" inline hipblasStatus_t testing_bad_operation() { Arguments arg; hipblasLocalHandle handle(arg); // invalid transpose operation enum return hipblasSgemv( handle, hipblasOperation_t(0), 0, 0, nullptr, nullptr, 0, nullptr, 0, nullptr, nullptr, 0); } inline hipblasStatus_t testing_handle() { // Test out hipblasCreate() and hipblasDestroy() hipblasHandle_t handle; CHECK_HIPBLAS_ERROR(hipblasCreate(&handle)); CHECK_HIPBLAS_ERROR(hipblasDestroy(handle)); return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gbmv.hpp000066400000000000000000000207311434647641600222110ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGbmvModel = ArgumentModel; inline void testname_gbmv(const Arguments& arg, std::string& name) { hipblasGbmvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gbmv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGbmvFn = FORTRAN ? hipblasGbmv : hipblasGbmv; int M = arg.M; int N = arg.N; int KL = arg.KL; int KU = arg.KU; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; size_t A_size = size_t(lda) * N; int dim_x; int dim_y; hipblasOperation_t transA = char2hipblas_operation(arg.transA); if(transA == HIPBLAS_OP_N) { dim_x = N; dim_y = M; } else { dim_x = M; dim_y = N; } hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || lda < KL + KU + 1 || !incx || !incy || KL < 0 || KU < 0; if(invalid_size || !M || !N) { hipblasStatus_t actual = hipblasGbmvFn(handle, transA, M, N, KL, KU, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t X_size = size_t(dim_x) * abs_incx; size_t Y_size = size_t(dim_y) * abs_incy; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); host_vector hy_cpu(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix(hA, arg, lda, N, lda, 0, 1, hipblas_client_alpha_sets_nan, true, false); hipblas_init_vector(hx, arg, dim_x, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, dim_y, abs_incy, 0, 1, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hy_cpu = hy: save a copy in hy_cpu which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGbmvFn( handle, transA, M, N, KL, KU, (T*)&h_alpha, dA, lda, dx, incx, (T*)&h_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGbmvFn( handle, transA, M, N, KL, KU, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_gbmv(transA, M, N, KL, KU, h_alpha, hA.data(), lda, hx.data(), incx, h_beta, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, dim_y, abs_incy, hy_cpu, hy_host); unit_check_general(1, dim_y, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, dim_y, abs_incy, hy_cpu.data(), hy_host.data()); hipblas_error_device = norm_check_general('F', 1, dim_y, abs_incy, hy_cpu.data(), hy_device.data()); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGbmvFn( handle, transA, M, N, KL, KU, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGbmvModel{}.log_args(std::cout, arg, gpu_time_used, gbmv_gflop_count(transA, M, N, KL, KU), gbmv_gbyte_count(transA, M, N, KL, KU), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gbmv_batched.hpp000066400000000000000000000252561434647641600236720ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGbmvBatchedModel = ArgumentModel; inline void testname_gbmv_batched(const Arguments& arg, std::string& name) { hipblasGbmvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gbmv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGbmvBatchedFn = FORTRAN ? hipblasGbmvBatched : hipblasGbmvBatched; int M = arg.M; int N = arg.N; int KL = arg.KL; int KU = arg.KU; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; size_t A_size = size_t(lda) * N; int dim_x; int dim_y; hipblasOperation_t transA = char2hipblas_operation(arg.transA); if(transA == HIPBLAS_OP_N) { dim_x = N; dim_y = M; } else { dim_x = M; dim_y = N; } hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || lda < KL + KU + 1 || !incx || !incy || batch_count < 0 || KL < 0 || KU < 0; if(invalid_size || !M || !N || !batch_count) { hipblasStatus_t actual = hipblasGbmvBatchedFn(handle, transA, M, N, KL, KU, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(dim_x, incx, batch_count); host_batch_vector hy(dim_y, incy, batch_count); host_batch_vector hy_host(dim_y, incy, batch_count); host_batch_vector hy_device(dim_y, incy, batch_count); host_batch_vector hy_cpu(dim_y, incy, batch_count); // arrays of pointers-to-device on host device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(dim_x, incx, batch_count); device_batch_vector dy(dim_y, incy, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, hipblas_client_beta_sets_nan); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGbmvBatchedFn(handle, transA, M, N, KL, KU, (T*)&h_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, (T*)&h_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGbmvBatchedFn(handle, transA, M, N, KL, KU, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_gbmv( transA, M, N, KL, KU, h_alpha, hA[b], lda, hx[b], incx, h_beta, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, dim_y, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, dim_y, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, dim_y, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, dim_y, abs_incy, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dy.transfer_from(hy)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGbmvBatchedFn(handle, transA, M, N, KL, KU, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGbmvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, gbmv_gflop_count(transA, M, N, KL, KU), gbmv_gbyte_count(transA, M, N, KL, KU), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gbmv_strided_batched.hpp000066400000000000000000000331111434647641600253750ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGbmvStridedBatchedModel = ArgumentModel; inline void testname_gbmv_strided_batched(const Arguments& arg, std::string& name) { hipblasGbmvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gbmv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGbmvStridedBatchedFn = FORTRAN ? hipblasGbmvStridedBatched : hipblasGbmvStridedBatched; int M = arg.M; int N = arg.N; int KL = arg.KL; int KU = arg.KU; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasStride stride_A = size_t(lda) * N * stride_scale; hipblasStride stride_x; hipblasStride stride_y; size_t A_size = stride_A * batch_count; int dim_x; int dim_y; hipblasOperation_t transA = char2hipblas_operation(arg.transA); if(transA == HIPBLAS_OP_N) { dim_x = N; dim_y = M; } else { dim_x = M; dim_y = N; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; stride_x = size_t(dim_x) * abs_incx * stride_scale; stride_y = size_t(dim_y) * abs_incy * stride_scale; size_t X_size = stride_x * batch_count; size_t Y_size = stride_y * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || lda < KL + KU + 1 || !incx || !incy || KL < 0 || KU < 0 || batch_count < 0; if(invalid_size || !M || !N || !batch_count) { hipblasStatus_t actual = hipblasGbmvStridedBatchedFn(handle, transA, M, N, KL, KU, nullptr, nullptr, lda, stride_A, nullptr, incx, stride_x, nullptr, nullptr, incy, stride_y, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); host_vector hy_cpu(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix( hA, arg, M, N, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, dim_x, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector( hy, arg, dim_y, abs_incy, stride_y, batch_count, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hz = hy: save a copy in hz which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGbmvStridedBatchedFn(handle, transA, M, N, KL, KU, (T*)&h_alpha, dA, lda, stride_A, dx, incx, stride_x, (T*)&h_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGbmvStridedBatchedFn(handle, transA, M, N, KL, KU, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_gbmv(transA, M, N, KL, KU, h_alpha, hA.data() + b * stride_A, lda, hx.data() + b * stride_x, incx, h_beta, hy_cpu.data() + b * stride_y, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, dim_y, batch_count, abs_incy, stride_y, hy_cpu, hy_host); unit_check_general(1, dim_y, batch_count, abs_incy, stride_y, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, dim_y, abs_incy, stride_y, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, dim_y, abs_incy, stride_y, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGbmvStridedBatchedFn(handle, transA, M, N, KL, KU, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGbmvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, gbmv_gflop_count(transA, M, N, KL, KU), gbmv_gbyte_count(transA, M, N, KL, KU), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_geam.hpp000066400000000000000000000166311434647641600221730ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include #include #include "hipblas_unique_ptr.hpp" #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGeamModel = ArgumentModel; inline void testname_geam(const Arguments& arg, std::string& name) { hipblasGeamModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_geam(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGeamFn = FORTRAN ? hipblasGeam : hipblasGeam; hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasOperation_t transB = char2hipblas_operation(arg.transB); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); int A_row, A_col, B_row, B_col; double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); if(transA == HIPBLAS_OP_N) { A_row = M; A_col = N; } else { A_row = N; A_col = M; } if(transB == HIPBLAS_OP_N) { B_row = M; B_col = N; } else { B_row = N; B_col = M; } size_t A_size = size_t(lda) * A_col; size_t B_size = size_t(ldb) * B_col; size_t C_size = size_t(ldc) * N; // check here to prevent undefined memory allocation error if(M <= 0 || N <= 0 || lda < A_row || ldb < B_row || ldc < M) { return HIPBLAS_STATUS_INVALID_VALUE; } // allocate memory on device device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC1(C_size); host_vector hC2(C_size); host_vector hC_copy(C_size); // Initial Data on CPU hipblas_init_matrix(hA, arg, A_row, A_col, lda, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix(hB, arg, B_row, B_col, ldb, 0, 1, hipblas_client_beta_sets_nan); hipblas_init_matrix(hC1, arg, M, N, ldc, 0, 1, hipblas_client_beta_sets_nan); hC2 = hC1; hC_copy = hC1; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB.data(), sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC1.data(), sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ { // &h_alpha and &h_beta are host pointers CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGeamFn( handle, transA, transB, M, N, &h_alpha, dA, lda, &h_beta, dB, ldb, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC1.data(), dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); } { CHECK_HIP_ERROR(hipMemcpy(dC, hC2.data(), sizeof(T) * C_size, hipMemcpyHostToDevice)); // d_alpha and d_beta are device pointers CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGeamFn( handle, transA, transB, M, N, d_alpha, dA, lda, d_beta, dB, ldb, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC2.data(), dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); } /* ===================================================================== CPU BLAS =================================================================== */ cblas_geam( transA, transB, M, N, &h_alpha, (T*)hA, lda, &h_beta, (T*)hB, ldb, (T*)hC_copy, ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, ldc, hC_copy.data(), hC1.data()); unit_check_general(M, N, ldc, hC_copy.data(), hC2.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, hC_copy.data(), hC1.data()); hipblas_error_device = norm_check_general('F', M, N, ldc, hC_copy.data(), hC2.data()); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGeamFn( handle, transA, transB, M, N, d_alpha, dA, lda, d_beta, dB, ldb, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasGeamModel{}.log_args(std::cout, arg, gpu_time_used, geam_gflop_count(M, N), geam_gbyte_count(M, N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_geam_batched.hpp000066400000000000000000000247631434647641600236520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include #include #include "hipblas_unique_ptr.hpp" #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGeamBatchedModel = ArgumentModel; inline void testname_geam_batched(const Arguments& arg, std::string& name) { hipblasGeamBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_geam_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGeamBatchedFn = FORTRAN ? hipblasGeamBatched : hipblasGeamBatched; hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasOperation_t transB = char2hipblas_operation(arg.transB); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); int A_row, A_col, B_row, B_col; if(transA == HIPBLAS_OP_N) { A_row = M; A_col = N; } else { A_row = N; A_col = M; } if(transB == HIPBLAS_OP_N) { B_row = M; B_col = N; } else { B_row = N; B_col = M; } size_t A_size = size_t(lda) * A_col; size_t B_size = size_t(ldb) * B_col; size_t C_size = size_t(ldc) * N; // check here to prevent undefined memory allocation error if(M <= 0 || N <= 0 || lda < A_row || ldb < B_row || ldc < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // allocate memory on device device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hC1(C_size, 1, batch_count); host_batch_vector hC2(C_size, 1, batch_count); host_batch_vector hC_copy(C_size, 1, batch_count); hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hB, arg, hipblas_client_beta_sets_nan); hipblas_init_vector(hC1, arg, hipblas_client_beta_sets_nan); hC2.copy_from(hC1); hC_copy.copy_from(hC1); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dC.transfer_from(hC1)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.norm_check || arg.unit_check) { /* ===================================================================== HIPBLAS =================================================================== */ { // &h_alpha and &h_beta are host pointers CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGeamBatchedFn(handle, transA, transB, M, N, &h_alpha, dA.ptr_on_device(), lda, &h_beta, dB.ptr_on_device(), ldb, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC1.transfer_from(dC)); } { CHECK_HIP_ERROR(dC.transfer_from(hC2)); // d_alpha and d_beta are device pointers CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGeamBatchedFn(handle, transA, transB, M, N, d_alpha, dA.ptr_on_device(), lda, d_beta, dB.ptr_on_device(), ldb, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC2.transfer_from(dC)); } /* ===================================================================== CPU BLAS =================================================================== */ // reference calculation for(int b = 0; b < batch_count; b++) { cblas_geam(transA, transB, M, N, &h_alpha, (T*)hA[b], lda, &h_beta, (T*)hB[b], ldb, (T*)hC_copy[b], ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, hC_copy, hC1); unit_check_general(M, N, batch_count, ldc, hC_copy, hC2); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, hC_copy, hC1, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, hC_copy, hC2, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGeamBatchedFn(handle, transA, transB, M, N, d_alpha, dA.ptr_on_device(), lda, d_beta, dB.ptr_on_device(), ldb, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasGeamBatchedModel{}.log_args(std::cout, arg, gpu_time_used, geam_gflop_count(M, N), geam_gbyte_count(M, N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_geam_strided_batched.hpp000066400000000000000000000300661434647641600253610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include #include #include "hipblas_unique_ptr.hpp" #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGeamStridedBatchedModel = ArgumentModel; inline void testname_geam_strided_batched(const Arguments& arg, std::string& name) { hipblasGeamStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_geam_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGeamStridedBatchedFn = FORTRAN ? hipblasGeamStridedBatched : hipblasGeamStridedBatched; hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasOperation_t transB = char2hipblas_operation(arg.transB); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); int A_row, A_col, B_row, B_col; hipblasStride stride_A, stride_B, stride_C; if(transA == HIPBLAS_OP_N) { A_row = M; A_col = N; } else { A_row = N; A_col = M; } if(transB == HIPBLAS_OP_N) { B_row = M; B_col = N; } else { B_row = N; B_col = M; } stride_A = size_t(lda) * A_col * stride_scale; stride_B = size_t(ldb) * B_col * stride_scale; stride_C = size_t(ldc) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t B_size = stride_B * batch_count; size_t C_size = stride_C * batch_count; // check here to prevent undefined memory allocation error if(M <= 0 || N <= 0 || lda < A_row || ldb < B_row || ldc < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // allocate memory on device device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); if(!dA || !dB || !dC || !d_alpha || !d_beta) { return HIPBLAS_STATUS_ALLOC_FAILED; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC1(C_size); host_vector hC2(C_size); host_vector hC_copy(C_size); // Initial Data on CPU hipblas_init_matrix( hA, arg, A_row, A_col, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix( hB, arg, B_row, B_col, ldb, stride_B, batch_count, hipblas_client_beta_sets_nan); hipblas_init_matrix(hC1, arg, M, N, ldc, stride_C, batch_count, hipblas_client_beta_sets_nan); hC2 = hC1; hC_copy = hC1; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB.data(), sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC1.data(), sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.norm_check || arg.unit_check) { /* ===================================================================== HIPBLAS =================================================================== */ { // &h_alpha and &h_beta are host pointers CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGeamStridedBatchedFn(handle, transA, transB, M, N, &h_alpha, dA, lda, stride_A, &h_beta, dB, ldb, stride_B, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC1.data(), dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); } { CHECK_HIP_ERROR(hipMemcpy(dC, hC2.data(), sizeof(T) * C_size, hipMemcpyHostToDevice)); // d_alpha and d_beta are device pointers CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGeamStridedBatchedFn(handle, transA, transB, M, N, d_alpha, dA, lda, stride_A, d_beta, dB, ldb, stride_B, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC2.data(), dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); } /* ===================================================================== CPU BLAS =================================================================== */ // reference calculation for(int b = 0; b < batch_count; b++) { cblas_geam(transA, transB, M, N, &h_alpha, (T*)hA + b * stride_A, lda, &h_beta, (T*)hB + b * stride_B, ldb, (T*)hC_copy + b * stride_C, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, stride_C, hC_copy, hC1); unit_check_general(M, N, batch_count, ldc, stride_C, hC_copy, hC2); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, stride_C, hC_copy, hC1, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, stride_C, hC_copy, hC2, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGeamStridedBatchedFn(handle, transA, transB, M, N, d_alpha, dA, lda, stride_A, d_beta, dB, ldb, stride_B, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasGeamStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, geam_gflop_count(M, N), geam_gbyte_count(M, N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gels.hpp000066400000000000000000000226011434647641600222060ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "gtest/gtest.h" #include #include #include #include #include "testing_common.hpp" using hipblasGelsModel = ArgumentModel; inline void testname_gels(const Arguments& arg, std::string& name) { hipblasGelsModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gels_bad_arg(const Arguments& arg) { auto hipblasGelsFn = arg.fortran ? hipblasGels : hipblasGels; hipblasLocalHandle handle(arg); const int M = 100; const int N = 101; const int nrhs = 10; const int lda = 102; const int ldb = 103; const hipblasOperation_t opN = HIPBLAS_OP_N; const hipblasOperation_t opBad = is_complex ? HIPBLAS_OP_T : HIPBLAS_OP_C; const size_t A_size = size_t(lda) * N; const size_t B_size = size_t(ldb) * nrhs; device_vector dA(A_size); device_vector dB(B_size); device_vector dInfo(1); int info = 0; EXPECT_HIPBLAS_STATUS(hipblasGelsFn(handle, opN, M, N, nrhs, dA, lda, dB, ldb, nullptr, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_HIPBLAS_STATUS(hipblasGelsFn(handle, opBad, M, N, nrhs, dA, lda, dB, ldb, &info, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-1, info); EXPECT_HIPBLAS_STATUS(hipblasGelsFn(handle, opN, -1, N, nrhs, dA, lda, dB, ldb, &info, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-2, info); EXPECT_HIPBLAS_STATUS(hipblasGelsFn(handle, opN, M, -1, nrhs, dA, lda, dB, ldb, &info, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-3, info); EXPECT_HIPBLAS_STATUS(hipblasGelsFn(handle, opN, M, N, -1, dA, lda, dB, ldb, &info, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-4, info); EXPECT_HIPBLAS_STATUS( hipblasGelsFn(handle, opN, M, N, nrhs, nullptr, lda, dB, ldb, &info, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-5, info); EXPECT_HIPBLAS_STATUS(hipblasGelsFn(handle, opN, M, N, nrhs, dA, M - 1, dB, ldb, &info, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-6, info); EXPECT_HIPBLAS_STATUS( hipblasGelsFn(handle, opN, M, N, nrhs, dA, lda, nullptr, ldb, &info, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-7, info); // Explicit values to check for ldb < M and ldb < N EXPECT_HIPBLAS_STATUS( hipblasGelsFn(handle, opN, 100, 200, nrhs, dA, lda, dB, 199, &info, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-8, info); EXPECT_HIPBLAS_STATUS( hipblasGelsFn(handle, opN, 200, 100, nrhs, dA, 201, dB, 199, &info, dInfo), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-8, info); EXPECT_HIPBLAS_STATUS(hipblasGelsFn(handle, opN, M, N, nrhs, dA, lda, dB, ldb, &info, nullptr), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-10, info); // If M == 0 || N == 0, A can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGelsFn(handle, opN, 0, N, nrhs, nullptr, lda, dB, ldb, &info, dInfo), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); EXPECT_HIPBLAS_STATUS( hipblasGelsFn(handle, opN, M, 0, nrhs, nullptr, lda, dB, ldb, &info, dInfo), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // If nrhs == 0, B can be nullptr EXPECT_HIPBLAS_STATUS(hipblasGelsFn(handle, opN, M, N, 0, dA, lda, nullptr, ldb, &info, dInfo), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // If M == 0 && N == 0, B can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGelsFn(handle, opN, 0, 0, nrhs, nullptr, lda, nullptr, ldb, &info, dInfo), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_gels(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGelsFn = FORTRAN ? hipblasGels : hipblasGels; char transc = arg.transA; int N = arg.N; int M = arg.M; int nrhs = arg.K; int lda = arg.lda; int ldb = arg.ldb; if(is_complex && transc == 'T') transc = 'C'; else if(!is_complex && transc == 'C') transc = 'T'; hipblasOperation_t trans = char2hipblas_operation(transc); size_t A_size = size_t(lda) * N; size_t B_size = size_t(ldb) * nrhs; // Check to prevent memory allocation error if(M < 0 || N < 0 || nrhs < 0 || lda < M || ldb < M || ldb < N) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hB_res(B_size); int info, info_res; int info_input(-1); device_vector dA(A_size); device_vector dB(B_size); device_vector dInfo(1); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA, hB, hX on CPU srand(1); hipblas_init(hA, true); hipblas_init(hB); hB_res = hB; // scale A to avoid singularities for(int i = 0; i < N; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[i + j * lda] += 400; else hA[i + j * lda] -= 4; } } // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, B_size * sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR( hipblasGelsFn(handle, trans, M, N, nrhs, dA, lda, dB, ldb, &info_input, dInfo)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hB_res, dB, B_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(&info_res, dInfo, sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ int sizeW = std::max(1, std::min(M, N) + std::max(std::min(M, N), nrhs)); host_vector hW(sizeW); info = cblas_gels(transc, M, N, nrhs, hA.data(), lda, hB.data(), ldb, hW.data(), sizeW); hipblas_error = norm_check_general('F', std::max(M, N), nrhs, ldb, hB.data(), hB_res.data()); if(info != info_res) hipblas_error += 1.0; if(info_input != 0) hipblas_error += 1.0; if(arg.unit_check) { double eps = std::numeric_limits::epsilon(); double tolerance = N * eps * 100; int zero = 0; unit_check_error(hipblas_error, tolerance); unit_check_general(1, 1, 1, &zero, &info_input); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasGelsFn(handle, trans, M, N, nrhs, dA, lda, dB, ldb, &info_input, dInfo)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGelsModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gels_batched.hpp000066400000000000000000000302521434647641600236610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "gtest/gtest.h" #include #include #include #include #include "testing_common.hpp" using hipblasGelsBatchedModel = ArgumentModel; inline void testname_gels_batched(const Arguments& arg, std::string& name) { hipblasGelsBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gels_batched_bad_arg(const Arguments& arg) { auto hipblasGelsBatchedFn = arg.fortran ? hipblasGelsBatched : hipblasGelsBatched; hipblasLocalHandle handle(arg); const int M = 100; const int N = 101; const int nrhs = 10; const int lda = 102; const int ldb = 103; const int batchCount = 2; const hipblasOperation_t opN = HIPBLAS_OP_N; const hipblasOperation_t opBad = is_complex ? HIPBLAS_OP_T : HIPBLAS_OP_C; const size_t A_size = size_t(lda) * N; const size_t B_size = size_t(ldb) * nrhs; device_batch_vector dA(A_size, 1, batchCount); device_batch_vector dB(B_size, 1, batchCount); device_vector dInfo(batchCount); int info = 0; T* const* dAp = dA.ptr_on_device(); T* const* dBp = dB.ptr_on_device(); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, M, N, nrhs, dAp, lda, dBp, ldb, nullptr, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opBad, M, N, nrhs, dAp, lda, dBp, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-1, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, -1, N, nrhs, dAp, lda, dBp, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-2, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, M, -1, nrhs, dAp, lda, dBp, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-3, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn(handle, opN, M, N, -1, dAp, lda, dBp, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-4, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, M, N, nrhs, nullptr, lda, dBp, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-5, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, M, N, nrhs, dAp, M - 1, dBp, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-6, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, M, N, nrhs, dAp, lda, nullptr, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-7, info); // Explicit values to check for ldb < M and ldb < N EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, 100, 200, nrhs, dAp, lda, dBp, 199, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-8, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, 200, 100, nrhs, dAp, 201, dBp, 199, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-8, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, M, N, nrhs, dAp, lda, dBp, ldb, &info, nullptr, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-10, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn(handle, opN, M, N, nrhs, dAp, lda, dBp, ldb, &info, dInfo, -1), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-11, info); // If M == 0 || N == 0, A can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, 0, N, nrhs, nullptr, lda, dBp, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, M, 0, nrhs, nullptr, lda, dBp, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // If nrhs == 0, B can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, M, N, 0, dAp, lda, nullptr, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // If M == 0 && N == 0, B can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn( handle, opN, 0, 0, nrhs, nullptr, lda, nullptr, ldb, &info, dInfo, batchCount), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // If batchCount == 0, dInfo can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGelsBatchedFn(handle, opN, M, N, nrhs, dAp, lda, dBp, ldb, &info, nullptr, 0), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_gels_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGelsBatchedFn = FORTRAN ? hipblasGelsBatched : hipblasGelsBatched; char transc = arg.transA; int N = arg.N; int M = arg.M; int nrhs = arg.K; int lda = arg.lda; int ldb = arg.ldb; int batchCount = arg.batch_count; if(is_complex && transc == 'T') transc = 'C'; else if(!is_complex && transc == 'C') transc = 'T'; hipblasOperation_t trans = char2hipblas_operation(transc); size_t A_size = size_t(lda) * N; size_t B_size = size_t(ldb) * nrhs; // Check to prevent memory allocation error if(M < 0 || N < 0 || nrhs < 0 || lda < M || ldb < M || ldb < N || batchCount < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batchCount == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batchCount); host_batch_vector hB(B_size, 1, batchCount); host_batch_vector hB_res(B_size, 1, batchCount); host_vector info_res(batchCount); host_vector info(batchCount); int info_input(-1); device_batch_vector dA(A_size, 1, batchCount); device_batch_vector dB(B_size, 1, batchCount); device_vector dInfo(batchCount); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA, hB, hX on CPU hipblas_init(hA, true); hipblas_init(hB); hB_res.copy_from(hB); // scale A to avoid singularities for(int b = 0; b < batchCount; b++) { for(int i = 0; i < N; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } // Copy data from CPU to device CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGelsBatchedFn(handle, trans, M, N, nrhs, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, &info_input, dInfo, batchCount)); // copy output from device to CPU CHECK_HIP_ERROR(hB_res.transfer_from(dB)); CHECK_HIP_ERROR( hipMemcpy(info_res.data(), dInfo, sizeof(int) * batchCount, hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ int sizeW = std::max(1, std::min(M, N) + std::max(std::min(M, N), nrhs)); host_vector hW(sizeW); for(int b = 0; b < batchCount; b++) { info[b] = cblas_gels(transc, M, N, nrhs, hA[b], lda, hB[b], ldb, hW.data(), sizeW); } hipblas_error = norm_check_general('F', std::max(M, N), nrhs, ldb, hB, hB_res, batchCount); if(info_input != 0) hipblas_error += 1.0; for(int b = 0; b < batchCount; b++) { if(info[b] != info_res[b]) hipblas_error += 1.0; } if(arg.unit_check) { double eps = std::numeric_limits::epsilon(); double tolerance = N * eps * 100; int zero = 0; unit_check_error(hipblas_error, tolerance); unit_check_general(1, 1, 1, &zero, &info_input); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGelsBatchedFn(handle, trans, M, N, nrhs, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, &info_input, dInfo, batchCount)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGelsBatchedModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gels_strided_batched.hpp000066400000000000000000000536711434647641600254110ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "gtest/gtest.h" #include #include #include #include #include "testing_common.hpp" using hipblasGelsStridedBatchedModel = ArgumentModel; inline void testname_gels_strided_batched(const Arguments& arg, std::string& name) { hipblasGelsStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gels_strided_batched_bad_arg(const Arguments& arg) { auto hipblasGelsStridedBatchedFn = arg.fortran ? hipblasGelsStridedBatched : hipblasGelsStridedBatched; hipblasLocalHandle handle(arg); const int M = 100; const int N = 101; const int nrhs = 10; const int lda = 102; const int ldb = 103; const int batchCount = 2; const hipblasOperation_t opN = HIPBLAS_OP_N; const hipblasOperation_t opBad = is_complex ? HIPBLAS_OP_T : HIPBLAS_OP_C; const hipblasStride strideA = size_t(lda) * N; const hipblasStride strideB = size_t(ldb) * nrhs; const size_t A_size = strideA * batchCount; const size_t B_size = strideB * batchCount; device_vector dA(A_size); device_vector dB(B_size); device_vector dInfo(batchCount); int info = 0; EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, M, N, nrhs, dA, lda, strideA, dB, ldb, strideB, nullptr, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opBad, M, N, nrhs, dA, lda, strideA, dB, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-1, info); EXPECT_HIPBLAS_STATUS( hipblasGelsStridedBatchedFn( handle, opN, -1, N, nrhs, dA, lda, strideA, dB, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-2, info); EXPECT_HIPBLAS_STATUS( hipblasGelsStridedBatchedFn( handle, opN, M, -1, nrhs, dA, lda, strideA, dB, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-3, info); EXPECT_HIPBLAS_STATUS( hipblasGelsStridedBatchedFn( handle, opN, M, N, -1, dA, lda, strideA, dB, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-4, info); EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, M, N, nrhs, nullptr, lda, strideA, dB, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-5, info); EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, M, N, nrhs, dA, M - 1, strideA, dB, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-6, info); EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, M, N, nrhs, dA, lda, strideA, nullptr, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-8, info); // Explicit values to check for ldb < M and ldb < N EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, 100, 200, nrhs, dA, lda, strideA, dB, 199, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-9, info); EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, 200, 100, nrhs, dA, 201, strideA, dB, 199, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-9, info); EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, M, N, nrhs, dA, lda, strideA, dB, ldb, strideB, &info, nullptr, batchCount), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-12, info); EXPECT_HIPBLAS_STATUS( hipblasGelsStridedBatchedFn( handle, opN, M, N, nrhs, dA, lda, strideA, dB, ldb, strideB, &info, dInfo, -1), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-13, info); // If M == 0 || N == 0, A can be nullptr EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, 0, N, nrhs, nullptr, lda, strideA, dB, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, M, 0, nrhs, nullptr, lda, strideA, dB, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // if nrhs == 0, B can be nullptr EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, M, N, 0, dA, lda, strideA, nullptr, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // if M == 0 && N == 0, B can be nullptr EXPECT_HIPBLAS_STATUS(hipblasGelsStridedBatchedFn(handle, opN, 0, 0, nrhs, dA, lda, strideA, nullptr, ldb, strideB, &info, dInfo, batchCount), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // if batchCount == 0, dInfo can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGelsStridedBatchedFn( handle, opN, M, N, nrhs, dA, lda, strideA, dB, ldb, strideB, &info, nullptr, 0), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_gels_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGelsStridedBatchedFn = FORTRAN ? hipblasGelsStridedBatched : hipblasGelsStridedBatched; char transc = arg.transA; int N = arg.N; int M = arg.M; int nrhs = arg.K; int lda = arg.lda; int ldb = arg.ldb; double strideScale = arg.stride_scale; int batchCount = arg.batch_count; if(is_complex && transc == 'T') transc = 'C'; else if(!is_complex && transc == 'C') transc = 'T'; // this makes logging incorrect as overriding arg hipblasOperation_t trans = char2hipblas_operation(transc); hipblasStride strideA = size_t(lda) * N * strideScale; hipblasStride strideB = size_t(ldb) * nrhs * strideScale; size_t A_size = strideA * batchCount; size_t B_size = strideB * batchCount; // Check to prevent memory allocation error if(M < 0 || N < 0 || nrhs < 0 || lda < M || ldb < M || ldb < N || batchCount < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batchCount == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hB_res(B_size); host_vector info_res(batchCount); host_vector info(batchCount); int info_input(-1); device_vector dA(A_size); device_vector dB(B_size); device_vector dInfo(batchCount); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA, hB, hX on CPU srand(1); hipblas_init(hA, true); hipblas_init(hB); for(int b = 0; b < batchCount; b++) { T* hAb = hA.data() + b * strideA; // scale A to avoid singularities for(int i = 0; i < N; i++) { for(int j = 0; j < N; j++) { if(i == j) hAb[i + j * lda] += 400; else hAb[i + j * lda] -= 4; } } } hB_res = hB; // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, B_size * sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGelsStridedBatchedFn(handle, trans, M, N, nrhs, dA, lda, strideA, dB, ldb, strideB, &info_input, dInfo, batchCount)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hB_res, dB, B_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(info_res.data(), dInfo, sizeof(int) * batchCount, hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ int sizeW = std::max(1, std::min(M, N) + std::max(std::min(M, N), nrhs)); host_vector hW(sizeW); for(int b = 0; b < batchCount; b++) { info[b] = cblas_gels(transc, M, N, nrhs, hA.data() + b * strideA, lda, hB.data() + b * strideB, ldb, hW.data(), sizeW); } hipblas_error = norm_check_general( 'F', std::max(M, N), nrhs, ldb, strideB, hB.data(), hB_res.data(), batchCount); if(info_input != 0) hipblas_error += 1.0; for(int b = 0; b < batchCount; b++) { if(info[b] != info_res[b]) hipblas_error += 1.0; } if(arg.unit_check) { double eps = std::numeric_limits::epsilon(); double tolerance = N * eps * 100; int zero = 0; unit_check_error(hipblas_error, tolerance); unit_check_general(1, 1, 1, &zero, &info_input); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGelsStridedBatchedFn(handle, trans, M, N, nrhs, dA, lda, strideA, dB, ldb, strideB, &info_input, dInfo, batchCount)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGelsStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gemm.hpp000066400000000000000000000175521434647641600222120ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" #include /* ============================================================================================ */ using hipblasGemmModel = ArgumentModel; inline void testname_gemm(const Arguments& arg, std::string& name) { hipblasGemmModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gemm(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGemmFn = FORTRAN ? hipblasGemm : hipblasGemm; hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasOperation_t transB = char2hipblas_operation(arg.transB); int M = arg.M; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); int A_row, A_col, B_row, B_col; if(transA == HIPBLAS_OP_N) { A_row = M; A_col = K; } else { A_row = K; A_col = M; } if(transB == HIPBLAS_OP_N) { B_row = K; B_col = N; } else { B_row = N; B_col = K; } size_t A_size = size_t(lda) * A_col; size_t B_size = size_t(ldb) * B_col; size_t C_size = size_t(ldc) * N; // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || K < 0 || lda < A_row || ldb < B_row || ldc < M) { return HIPBLAS_STATUS_INVALID_VALUE; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_copy(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); // Initial Data on CPU hipblas_init_matrix(hA, arg, A_row, A_col, lda, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix( hB, arg, B_row, B_col, ldb, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_matrix(hC_host, arg, M, N, ldc, 0, 1, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hz = hx: save a copy in hC_copy which will be output of CPU BLAS hC_copy = hC_host; hC_device = hC_host; // copy data from CPU to device, does not work for lda != A_row CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * lda * A_col, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * ldb * B_col, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * ldc * N, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); // library interface CHECK_HIPBLAS_ERROR(hipblasGemmFn( handle, transA, transB, M, N, K, &h_alpha, dA, lda, dB, ldb, &h_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * ldc * N, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * ldc * N, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGemmFn( handle, transA, transB, M, N, K, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * ldc * N, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_gemm(transA, transB, M, N, K, h_alpha, hA.data(), lda, hB.data(), ldb, h_beta, hC_copy.data(), ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, ldc, hC_copy, hC_host); unit_check_general(M, N, ldc, hC_copy, hC_device); } if(arg.norm_check) { hipblas_error_host = std::abs(norm_check_general('F', M, N, ldc, hC_copy, hC_host)); hipblas_error_device = std::abs(norm_check_general('F', M, N, ldc, hC_copy, hC_device)); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); // gemm has better performance in host mode. In rocBLAS in device mode // we need to copy alpha and beta to the host. CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGemmFn( handle, transA, transB, M, N, K, &h_alpha, dA, lda, dB, ldb, &h_beta, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGemmModel{}.log_args(std::cout, arg, gpu_time_used, gemm_gflop_count(M, N, K), gemm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gemm_batched.hpp000066400000000000000000000265641434647641600236670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "arg_check.h" #include "testing_common.hpp" #include /* ============================================================================================ */ using hipblasGemmBatchedModel = ArgumentModel; inline void testname_gemm_batched(const Arguments& arg, std::string& name) { hipblasGemmBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gemm_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGemmBatchedFn = FORTRAN ? hipblasGemmBatched : hipblasGemmBatched; hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasOperation_t transB = char2hipblas_operation(arg.transB); int M = arg.M; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // bad arg checks if(batch_count < 0 || M < 0 || N < 0 || K < 0 || lda < 0 || ldb < 0 || ldc < 0) { hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; hipblasLocalHandle handle(arg); const T *dA_array[1], *dB_array[1]; T* dC1_array[1]; status = hipblasGemmBatchedFn(handle, transA, transB, M, N, K, &h_alpha, dA_array, lda, dB_array, ldb, &h_beta, dC1_array, ldc, batch_count); verify_hipblas_status_invalid_value( status, "ERROR: batch_count < 0 || M < 0 || N < 0 || K < 0 || lda < 0 || ldb < 0 || ldc < 0 "); return status; } int A_row, A_col, B_row, B_col; if(transA == HIPBLAS_OP_N) { A_row = M; A_col = K; } else { A_row = K; A_col = M; } if(transB == HIPBLAS_OP_N) { B_row = K; B_col = N; } else { B_row = N; B_col = K; } if(lda < A_row || ldb < B_row || ldc < M) { return HIPBLAS_STATUS_INVALID_VALUE; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); size_t A_size = size_t(lda) * A_col; size_t B_size = size_t(ldb) * B_col; size_t C_size = size_t(ldc) * N; // host arrays host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hC_host(C_size, 1, batch_count); host_batch_vector hC_device(C_size, 1, batch_count); host_batch_vector hC_copy(C_size, 1, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hB, arg, hipblas_client_alpha_sets_nan); hipblas_init_vector(hC_host, arg, hipblas_client_beta_sets_nan); hC_device.copy_from(hC_host); hC_copy.copy_from(hC_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { // calculate "golden" result on CPU for(int i = 0; i < batch_count; i++) { cblas_gemm(transA, transB, M, N, K, h_alpha, (T*)hA[i], lda, (T*)hB[i], ldb, h_beta, (T*)hC_copy[i], ldc); } // test hipBLAS batched gemm with alpha and beta pointers on device CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGemmBatchedFn(handle, transA, transB, M, N, K, d_alpha, (const T* const*)dA.ptr_on_device(), lda, (const T* const*)dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); // test hipBLAS batched gemm with alpha and beta pointers on host CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGemmBatchedFn(handle, transA, transB, M, N, K, &h_alpha, (const T* const*)dA.ptr_on_device(), lda, (const T* const*)dB.ptr_on_device(), ldb, &h_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, hC_copy, hC_host); unit_check_general(M, N, batch_count, ldc, hC_copy, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, hC_copy, hC_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, hC_copy, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); // gemm has better performance in host mode. In rocBLAS in device mode // we need to copy alpha and beta to the host. CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGemmBatchedFn(handle, transA, transB, M, N, K, &h_alpha, (const T* const*)dA.ptr_on_device(), lda, (const T* const*)dB.ptr_on_device(), ldb, &h_beta, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGemmBatchedModel{}.log_args(std::cout, arg, gpu_time_used, gemm_gflop_count(M, N, K), gemm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gemm_batched_ex.hpp000066400000000000000000000376741434647641600243670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include #include #include "hipblas_unique_ptr.hpp" #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGemmBatchedExModel = ArgumentModel; inline void testname_gemm_batched_ex(const Arguments& arg, std::string& name) { hipblasGemmBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gemm_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGemmBatchedExFn = FORTRAN ? hipblasGemmBatchedExFortran : hipblasGemmBatchedEx; hipblasGemmAlgo_t algo = HIPBLAS_GEMM_DEFAULT; uint32_t solution_index = 0; uint32_t flags = 0; hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasOperation_t transB = char2hipblas_operation(arg.transB); int M = arg.M; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; int batch_count = arg.batch_count; hipblasDatatype_t a_type = arg.a_type; hipblasDatatype_t b_type = arg.b_type; hipblasDatatype_t c_type = arg.c_type; hipblasDatatype_t compute_type = arg.compute_type; Tex h_alpha_Tc = arg.get_alpha(); Tex h_beta_Tc = arg.get_beta(); int norm_check = arg.norm_check; int unit_check = arg.unit_check; int timing = arg.timing; int A_row = transA == HIPBLAS_OP_N ? M : K; int A_col = transA == HIPBLAS_OP_N ? K : M; int B_row = transB == HIPBLAS_OP_N ? K : N; int B_col = transB == HIPBLAS_OP_N ? N : K; // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || K < 0 || lda < A_row || ldb < B_row || ldc < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } const size_t size_A = static_cast(lda) * static_cast(A_col); const size_t size_B = static_cast(ldb) * static_cast(B_col); const size_t size_C = static_cast(ldc) * static_cast(N); device_batch_vector dA(size_A, 1, batch_count); device_batch_vector dB(size_B, 1, batch_count); device_batch_vector dC(size_C, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); host_batch_vector hA(size_A, 1, batch_count); host_batch_vector hB(size_B, 1, batch_count); host_batch_vector hC_host(size_C, 1, batch_count); host_batch_vector hC_device(size_C, 1, batch_count); host_batch_vector hC_gold(size_C, 1, batch_count); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hB, arg, hipblas_client_alpha_sets_nan); hipblas_init_vector(hC_host, arg, hipblas_client_beta_sets_nan); hC_device.copy_from(hC_host); hC_gold.copy_from(hC_host); // Initial Data on CPU srand(1); for(int b = 0; b < batch_count; b++) { #ifdef __HIP_PLATFORM_NVCC__ CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); #else if(std::is_same{} && transA == HIPBLAS_OP_N && layout_pack_int8(handle)) { host_batch_vector hA_packed(size_A, 1, batch_count); hA_packed.copy_from(hA); for(int b = 0; b < batch_count; b++) hipblas_packInt8(hA_packed[b], hA[b], M, K, lda); CHECK_HIP_ERROR(dA.transfer_from(hA_packed)); } else { CHECK_HIP_ERROR(dA.transfer_from(hA)); } if(std::is_same{} && transB != HIPBLAS_OP_N && layout_pack_int8(handle)) { host_batch_vector hB_packed(size_B, 1, batch_count); hB_packed.copy_from(hB); for(int b = 0; b < batch_count; b++) hipblas_packInt8(hB_packed[b], hB[b], N, K, ldb); CHECK_HIP_ERROR(dB.transfer_from(hB_packed)); } else { CHECK_HIP_ERROR(dB.transfer_from(hB)); } #endif } CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha_Tc, sizeof(Tex), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta_Tc, sizeof(Tex), hipMemcpyHostToDevice)); if(unit_check || norm_check) { // hipBLAS CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGemmBatchedExFn(handle, transA, transB, M, N, K, &h_alpha_Tc, (const void**)(Ta**)dA.ptr_on_device(), a_type, lda, (const void**)(Tb**)dB.ptr_on_device(), b_type, ldb, &h_beta_Tc, (void**)(Tc**)dC.ptr_on_device(), c_type, ldc, batch_count, compute_type, algo)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); CHECK_HIP_ERROR(dC.transfer_from(hC_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGemmBatchedExFn(handle, transA, transB, M, N, K, d_alpha, (const void**)(Ta**)dA.ptr_on_device(), a_type, lda, (const void**)(Tb**)dB.ptr_on_device(), b_type, ldb, d_beta, (void**)(Tc**)dC.ptr_on_device(), c_type, ldc, batch_count, compute_type, algo)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); // CPU BLAS for(int b = 0; b < batch_count; b++) { cblas_gemm(transA, transB, M, N, K, h_alpha_Tc, hA[b], lda, hB[b], ldb, h_beta_Tc, hC_gold[b], ldc); } if(unit_check) { // check for float16/bfloat16 input if((getArchMajor() == 11) && ((std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}))) { const double tol = K * sum_error_tolerance_for_gfx11; near_check_general(M, N, batch_count, ldc, hC_gold, hC_host, tol); near_check_general(M, N, batch_count, ldc, hC_gold, hC_device, tol); } else { unit_check_general(M, N, batch_count, ldc, hC_gold, hC_host); unit_check_general(M, N, batch_count, ldc, hC_gold, hC_device); } } if(norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, hC_gold, hC_device, batch_count); } } if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGemmBatchedExFn(handle, transA, transB, M, N, K, &h_alpha_Tc, (const void**)(Ta**)dA.ptr_on_device(), a_type, lda, (const void**)(Tb**)dB.ptr_on_device(), b_type, ldb, &h_beta_Tc, (void**)(Tc**)dC.ptr_on_device(), c_type, ldc, batch_count, compute_type, algo)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGemmBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, gemm_gflop_count(M, N, K), gemm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_gemm_batched_ex(const Arguments& arg) { hipblasDatatype_t a_type = arg.a_type; hipblasDatatype_t b_type = arg.b_type; hipblasDatatype_t c_type = arg.c_type; hipblasDatatype_t compute_type = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(a_type == HIPBLAS_R_16F && b_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && compute_type == HIPBLAS_R_16F) { status = testing_gemm_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_16F && b_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && compute_type == HIPBLAS_R_32F) { status = testing_gemm_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_16B && b_type == HIPBLAS_R_16B && c_type == HIPBLAS_R_16B && c_type == HIPBLAS_R_16B && compute_type == HIPBLAS_R_32F) { status = testing_gemm_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_32F && b_type == HIPBLAS_R_32F && c_type == HIPBLAS_R_32F && c_type == HIPBLAS_R_32F && compute_type == HIPBLAS_R_32F) { status = testing_gemm_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_64F && b_type == HIPBLAS_R_64F && c_type == HIPBLAS_R_64F && c_type == HIPBLAS_R_64F && compute_type == HIPBLAS_R_64F) { status = testing_gemm_batched_ex_template(arg); } else if(a_type == HIPBLAS_C_32F && b_type == HIPBLAS_C_32F && c_type == HIPBLAS_C_32F && c_type == HIPBLAS_C_32F && compute_type == HIPBLAS_C_32F) { status = testing_gemm_batched_ex_template(arg); } else if(a_type == HIPBLAS_C_64F && b_type == HIPBLAS_C_64F && c_type == HIPBLAS_C_64F && c_type == HIPBLAS_C_64F && compute_type == HIPBLAS_C_64F) { status = testing_gemm_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_8I && b_type == HIPBLAS_R_8I && c_type == HIPBLAS_R_32I && c_type == HIPBLAS_R_32I && compute_type == HIPBLAS_R_32I) { status = testing_gemm_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_gemm_ex.hpp000066400000000000000000000361611434647641600227030ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "utility.h" #include #include #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGemmExModel = ArgumentModel; inline void testname_gemm_ex(const Arguments& arg, std::string& name) { hipblasGemmExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gemm_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGemmExFn = FORTRAN ? hipblasGemmExFortran : hipblasGemmEx; hipblasGemmAlgo_t algo = HIPBLAS_GEMM_DEFAULT; uint32_t solution_index = 0; uint32_t flags = 0; size_t* workspace_size = 0; void* workspace = 0; hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasOperation_t transB = char2hipblas_operation(arg.transB); int M = arg.M; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; hipblasDatatype_t a_type = arg.a_type; hipblasDatatype_t b_type = arg.b_type; hipblasDatatype_t c_type = arg.c_type; hipblasDatatype_t compute_type = arg.compute_type; Tex h_alpha_Tc = arg.get_alpha(); Tex h_beta_Tc = arg.get_beta(); int norm_check = arg.norm_check; int unit_check = arg.unit_check; int timing = arg.timing; int A_row = transA == HIPBLAS_OP_N ? M : K; int A_col = transA == HIPBLAS_OP_N ? K : M; int B_row = transB == HIPBLAS_OP_N ? K : N; int B_col = transB == HIPBLAS_OP_N ? N : K; // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || K < 0 || lda < A_row || ldb < B_row || ldc < M) { return HIPBLAS_STATUS_INVALID_VALUE; } const size_t size_A = static_cast(lda) * static_cast(A_col); const size_t size_B = static_cast(ldb) * static_cast(B_col); const size_t size_C = static_cast(ldc) * static_cast(N); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(size_A); host_vector hB(size_B); host_vector hC_host(size_C); host_vector hC_device(size_C); host_vector hC_gold(size_C); device_vector dA(size_A); device_vector dB(size_B); device_vector dC(size_C); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); for(auto int8Type : {HIPBLAS_INT8_DATATYPE_DEFAULT, HIPBLAS_INT8_DATATYPE_INT8, HIPBLAS_INT8_DATATYPE_PACK_INT8x4}) { // only need to test multiple int8Type for int8_t, for other datatypes break if(!(std::is_same{}) && HIPBLAS_INT8_DATATYPE_DEFAULT != int8Type) break; hipblasSetInt8Datatype(handle, int8Type); // Initial Data on CPU hipblas_init_matrix(hA, arg, A_row, A_col, lda, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix( hB, arg, B_row, B_col, ldb, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_matrix(hC_host, arg, M, N, ldc, 0, 1, hipblas_client_beta_sets_nan); hC_gold = hC_device = hC_host; // copy data from CPU to device // CUDA doesn't do packing #ifdef __HIP_PLATFORM_NVCC__ if(HIPBLAS_INT8_DATATYPE_DEFAULT != int8Type) break; CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(Ta) * size_A, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(Tb) * size_B, hipMemcpyHostToDevice)); #else if(std::is_same{} && transA == HIPBLAS_OP_N && layout_pack_int8(handle)) { host_vector hA_packed(hA); hipblas_packInt8(hA_packed, M, K, lda); CHECK_HIP_ERROR(hipMemcpy(dA, hA_packed, sizeof(Ta) * size_A, hipMemcpyHostToDevice)); } else { CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(Ta) * size_A, hipMemcpyHostToDevice)); } if(std::is_same{} && transB != HIPBLAS_OP_N && layout_pack_int8(handle)) { host_vector hB_packed(hB); hipblas_packInt8(hB_packed, N, K, ldb); CHECK_HIP_ERROR(hipMemcpy(dB, hB_packed, sizeof(Tb) * size_B, hipMemcpyHostToDevice)); } else { CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(Tb) * size_B, hipMemcpyHostToDevice)); } #endif CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(Tc) * size_C, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha_Tc, sizeof(Tex), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta_Tc, sizeof(Tex), hipMemcpyHostToDevice)); if(unit_check || norm_check) { // hipBLAS CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGemmExFn(handle, transA, transB, M, N, K, &h_alpha_Tc, dA, a_type, lda, dB, b_type, ldb, &h_beta_Tc, dC, c_type, ldc, compute_type, algo)); CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(Tc) * size_C, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(Tc) * size_C, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGemmExFn(handle, transA, transB, M, N, K, d_alpha, dA, a_type, lda, dB, b_type, ldb, d_beta, dC, c_type, ldc, compute_type, algo)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(Tc) * size_C, hipMemcpyDeviceToHost)); // reference BLAS cblas_gemm(transA, transB, M, N, K, h_alpha_Tc, hA.data(), lda, hB.data(), ldb, h_beta_Tc, hC_gold.data(), ldc); if(unit_check) { // check for float16/bfloat16 input if((getArchMajor() == 11) && ((std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}))) { const double tol = K * sum_error_tolerance_for_gfx11; near_check_general(M, N, ldc, hC_gold.data(), hC_host.data(), tol); near_check_general(M, N, ldc, hC_gold.data(), hC_device.data(), tol); } else { unit_check_general(M, N, ldc, hC_gold, hC_host); unit_check_general(M, N, ldc, hC_gold, hC_device); } } if(norm_check) { hipblas_error_host = std::abs(norm_check_general('F', M, N, ldc, hC_gold, hC_host)); hipblas_error_device = std::abs(norm_check_general('F', M, N, ldc, hC_gold, hC_device)); } } } if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGemmExFn(handle, transA, transB, M, N, K, &h_alpha_Tc, dA, a_type, lda, dB, b_type, ldb, &h_beta_Tc, dC, c_type, ldc, compute_type, algo)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGemmExModel{}.log_args(std::cout, arg, gpu_time_used, gemm_gflop_count(M, N, K), gemm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_gemm_ex(const Arguments& arg) { hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; hipblasDatatype_t a_type = arg.a_type; hipblasDatatype_t b_type = arg.b_type; hipblasDatatype_t c_type = arg.c_type; hipblasDatatype_t compute_type = arg.compute_type; if(a_type == HIPBLAS_R_16F && b_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && compute_type == HIPBLAS_R_16F) { status = testing_gemm_ex_template(arg); } else if(a_type == HIPBLAS_R_16F && b_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && compute_type == HIPBLAS_R_32F) { status = testing_gemm_ex_template(arg); } else if(a_type == HIPBLAS_R_16B && b_type == HIPBLAS_R_16B && c_type == HIPBLAS_R_16B && c_type == HIPBLAS_R_16B && compute_type == HIPBLAS_R_32F) { status = testing_gemm_ex_template( arg); } else if(a_type == HIPBLAS_R_32F && b_type == HIPBLAS_R_32F && c_type == HIPBLAS_R_32F && c_type == HIPBLAS_R_32F && compute_type == HIPBLAS_R_32F) { status = testing_gemm_ex_template(arg); } else if(a_type == HIPBLAS_R_64F && b_type == HIPBLAS_R_64F && c_type == HIPBLAS_R_64F && c_type == HIPBLAS_R_64F && compute_type == HIPBLAS_R_64F) { status = testing_gemm_ex_template(arg); } else if(a_type == HIPBLAS_C_32F && b_type == HIPBLAS_C_32F && c_type == HIPBLAS_C_32F && c_type == HIPBLAS_C_32F && compute_type == HIPBLAS_C_32F) { status = testing_gemm_ex_template(arg); } else if(a_type == HIPBLAS_C_64F && b_type == HIPBLAS_C_64F && c_type == HIPBLAS_C_64F && c_type == HIPBLAS_C_64F && compute_type == HIPBLAS_C_64F) { status = testing_gemm_ex_template(arg); } else if(a_type == HIPBLAS_R_8I && b_type == HIPBLAS_R_8I && c_type == HIPBLAS_R_32I && c_type == HIPBLAS_R_32I && compute_type == HIPBLAS_R_32I) { status = testing_gemm_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_gemm_strided_batched.hpp000066400000000000000000000305341434647641600253750ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include #include "hipblas_unique_ptr.hpp" #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGemmStridedBatchedModel = ArgumentModel; inline void testname_gemm_strided_batched(const Arguments& arg, std::string& name) { hipblasGemmStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gemm_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGemmStridedBatchedFn = FORTRAN ? hipblasGemmStridedBatched : hipblasGemmStridedBatched; hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasOperation_t transB = char2hipblas_operation(arg.transB); int M = arg.M; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || K < 0 || lda < 0 || ldb < 0 || ldc < 0 || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } int A_row, A_col, B_row, B_col; if(transA == HIPBLAS_OP_N) { A_row = M; A_col = K; } else { A_row = K; A_col = M; } if(transB == HIPBLAS_OP_N) { B_row = K; B_col = N; } else { B_row = N; B_col = K; } hipblasStride stride_A = size_t(lda) * A_col * stride_scale; hipblasStride stride_B = size_t(ldb) * B_col * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t B_size = stride_B * batch_count; size_t C_size = stride_C * batch_count; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_copy(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); // Initial Data on CPU hipblas_init_matrix( hA, arg, A_row, A_col, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix( hB, arg, B_row, B_col, ldb, stride_B, batch_count, hipblas_client_alpha_sets_nan); hipblas_init_matrix( hC_host, arg, M, N, ldc, stride_C, batch_count, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hz = hx: save a copy in hC_copy which will be output of CPU BLAS hC_copy = hC_host; hC_device = hC_host; // copy data from CPU to device, does not work for lda != A_row CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { // host mode CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); // library interface CHECK_HIPBLAS_ERROR(hipblasGemmStridedBatchedFn(handle, transA, transB, M, N, K, &h_alpha, dA, lda, stride_A, dB, ldb, stride_B, &h_beta, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); // device mode CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasGemmStridedBatchedFn(handle, transA, transB, M, N, K, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int i = 0; i < batch_count; i++) { cblas_gemm(transA, transB, M, N, K, h_alpha, hA.data() + stride_A * i, lda, hB.data() + stride_B * i, ldb, h_beta, hC_copy.data() + stride_C * i, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, stride_C, hC_copy, hC_host); unit_check_general(M, N, batch_count, ldc, stride_C, hC_copy, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, stride_C, hC_copy, hC_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, stride_C, hC_copy, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); // gemm has better performance in host mode. In rocBLAS in device mode // we need to copy alpha and beta to the host. CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGemmStridedBatchedFn(handle, transA, transB, M, N, K, &h_alpha, dA, lda, stride_A, dB, ldb, stride_B, &h_beta, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGemmStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, gemm_gflop_count(M, N, K), gemm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gemm_strided_batched_ex.hpp000066400000000000000000000431501434647641600260670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include #include #include "hipblas_unique_ptr.hpp" #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGemmStridedBatchedExModel = ArgumentModel; // strides not logged inline void testname_gemm_strided_batched_ex(const Arguments& arg, std::string& name) { hipblasGemmStridedBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gemm_strided_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGemmStridedBatchedExFn = FORTRAN ? hipblasGemmStridedBatchedExFortran : hipblasGemmStridedBatchedExFortran; hipblasGemmAlgo_t algo = HIPBLAS_GEMM_DEFAULT; uint32_t solution_index = 0; uint32_t flags = 0; hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasOperation_t transB = char2hipblas_operation(arg.transB); int M = arg.M; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; hipblasDatatype_t a_type = arg.a_type; hipblasDatatype_t b_type = arg.b_type; hipblasDatatype_t c_type = arg.c_type; hipblasDatatype_t compute_type = arg.compute_type; int batch_count = arg.batch_count; int norm_check = arg.norm_check; int unit_check = arg.unit_check; int timing = arg.timing; Tex h_alpha_Tc = arg.get_alpha(); Tex h_beta_Tc = arg.get_beta(); int A_row = transA == HIPBLAS_OP_N ? M : K; int A_col = transA == HIPBLAS_OP_N ? K : M; int B_row = transB == HIPBLAS_OP_N ? K : N; int B_col = transB == HIPBLAS_OP_N ? N : K; // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || K < 0 || lda < A_row || ldb < B_row || ldc < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } const size_t stride_A = static_cast(lda) * static_cast(A_col); const size_t stride_B = static_cast(ldb) * static_cast(B_col); const size_t stride_C = static_cast(ldc) * static_cast(N); const size_t size_A = stride_A * batch_count; const size_t size_B = stride_B * batch_count; const size_t size_C = stride_C * batch_count; device_vector dA(size_A); device_vector dB(size_B); device_vector dC(size_C); device_vector d_alpha(1); device_vector d_beta(1); if(!dA || !dB || !dC || !d_alpha || !d_beta) { PRINT_IF_HIP_ERROR(hipErrorOutOfMemory); return HIPBLAS_STATUS_ALLOC_FAILED; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(size_A); host_vector hB(size_B); host_vector hC_host(size_C); host_vector hC_device(size_C); host_vector hC_gold(size_C); // Initial Data on CPU hipblas_init_matrix( hA, arg, A_row, A_col, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix( hB, arg, B_row, B_col, ldb, stride_B, batch_count, hipblas_client_alpha_sets_nan); hipblas_init_matrix( hC_host, arg, M, N, ldc, stride_C, batch_count, hipblas_client_beta_sets_nan); hC_gold = hC_device = hC_host; // copy data from CPU to device #ifdef __HIP_PLATFORM_NVCC__ CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(Ta) * size_A, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(Tb) * size_B, hipMemcpyHostToDevice)); #else if(std::is_same{} && transA == HIPBLAS_OP_N && layout_pack_int8(handle)) { host_vector hA_packed(hA); hipblas_packInt8(hA_packed, M, K, lda, batch_count, stride_A); CHECK_HIP_ERROR(hipMemcpy(dA, hA_packed, sizeof(Ta) * size_A, hipMemcpyHostToDevice)); } else { CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(Ta) * size_A, hipMemcpyHostToDevice)); } if(std::is_same{} && transB != HIPBLAS_OP_N && layout_pack_int8(handle)) { host_vector hB_packed(hB); hipblas_packInt8(hB_packed, N, K, ldb, batch_count, stride_B); CHECK_HIP_ERROR(hipMemcpy(dB, hB_packed, sizeof(Tb) * size_B, hipMemcpyHostToDevice)); } else { CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(Tb) * size_B, hipMemcpyHostToDevice)); } #endif CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(Tc) * size_C, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha_Tc, sizeof(Tex), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta_Tc, sizeof(Tex), hipMemcpyHostToDevice)); if(unit_check || norm_check) { // hipBLAS CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGemmStridedBatchedExFn(handle, transA, transB, M, N, K, &h_alpha_Tc, dA, a_type, lda, stride_A, dB, b_type, ldb, stride_B, &h_beta_Tc, dC, c_type, ldc, stride_C, batch_count, compute_type, algo)); CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(Tc) * size_C, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(Tc) * size_C, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGemmStridedBatchedExFn(handle, transA, transB, M, N, K, d_alpha, dA, a_type, lda, stride_A, dB, b_type, ldb, stride_B, d_beta, dC, c_type, ldc, stride_C, batch_count, compute_type, algo)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(Tc) * size_C, hipMemcpyDeviceToHost)); // CPU BLAS for(int b = 0; b < batch_count; b++) { cblas_gemm(transA, transB, M, N, K, h_alpha_Tc, hA.data() + b * stride_A, lda, hB.data() + b * stride_B, ldb, h_beta_Tc, hC_gold.data() + b * stride_C, ldc); } if(unit_check) { // check for float16/bfloat16 input if((getArchMajor() == 11) && ((std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}))) { const double tol = K * sum_error_tolerance_for_gfx11; near_check_general(M, N, batch_count, ldc, stride_C, hC_gold, hC_host, tol); near_check_general(M, N, batch_count, ldc, stride_C, hC_gold, hC_device, tol); } else { unit_check_general(M, N, batch_count, ldc, stride_C, hC_gold, hC_host); unit_check_general(M, N, batch_count, ldc, stride_C, hC_gold, hC_device); } } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, stride_C, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, stride_C, hC_gold, hC_device, batch_count); } } if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGemmStridedBatchedExFn(handle, transA, transB, M, N, K, &h_alpha_Tc, dA, a_type, lda, stride_A, dB, b_type, ldb, stride_B, &h_beta_Tc, dC, c_type, ldc, stride_C, batch_count, compute_type, algo)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGemmStridedBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, gemm_gflop_count(M, N, K), gemm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_gemm_strided_batched_ex(const Arguments& arg) { hipblasDatatype_t a_type = arg.a_type; hipblasDatatype_t b_type = arg.b_type; hipblasDatatype_t c_type = arg.c_type; hipblasDatatype_t compute_type = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(a_type == HIPBLAS_R_16F && b_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && compute_type == HIPBLAS_R_16F) { status = testing_gemm_strided_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_16F && b_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && c_type == HIPBLAS_R_16F && compute_type == HIPBLAS_R_32F) { status = testing_gemm_strided_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_16B && b_type == HIPBLAS_R_16B && c_type == HIPBLAS_R_16B && c_type == HIPBLAS_R_16B && compute_type == HIPBLAS_R_32F) { status = testing_gemm_strided_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_32F && b_type == HIPBLAS_R_32F && c_type == HIPBLAS_R_32F && c_type == HIPBLAS_R_32F && compute_type == HIPBLAS_R_32F) { status = testing_gemm_strided_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_64F && b_type == HIPBLAS_R_64F && c_type == HIPBLAS_R_64F && c_type == HIPBLAS_R_64F && compute_type == HIPBLAS_R_64F) { status = testing_gemm_strided_batched_ex_template(arg); } else if(a_type == HIPBLAS_C_32F && b_type == HIPBLAS_C_32F && c_type == HIPBLAS_C_32F && c_type == HIPBLAS_C_32F && compute_type == HIPBLAS_C_32F) { status = testing_gemm_strided_batched_ex_template(arg); } else if(a_type == HIPBLAS_C_64F && b_type == HIPBLAS_C_64F && c_type == HIPBLAS_C_64F && c_type == HIPBLAS_C_64F && compute_type == HIPBLAS_C_64F) { status = testing_gemm_strided_batched_ex_template(arg); } else if(a_type == HIPBLAS_R_8I && b_type == HIPBLAS_R_8I && c_type == HIPBLAS_R_32I && c_type == HIPBLAS_R_32I && compute_type == HIPBLAS_R_32I) { status = testing_gemm_strided_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_gemv.hpp000066400000000000000000000172231434647641600222160ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGemvModel = ArgumentModel; inline void testname_gemv(const Arguments& arg, std::string& name) { hipblasGemvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gemv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGemvFn = FORTRAN ? hipblasGemv : hipblasGemv; int M = arg.M; int N = arg.N; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; size_t A_size = size_t(lda) * N; size_t X_size, dim_x; size_t Y_size, dim_y; hipblasOperation_t transA = char2hipblas_operation(arg.transA); if(transA == HIPBLAS_OP_N) { dim_x = N; dim_y = M; } else { dim_x = M; dim_y = N; } hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || lda < M || lda < 1 || !incx || !incy; if(invalid_size || !M || !N) { // Only rocBLAS conforms to expected behaviour so commenting out /* hipblasStatus_t actual = hipblasGemvFn( handle, transA, M, N, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; */ return invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; X_size = dim_x * abs_incx; Y_size = dim_y * abs_incy; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_cpu(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix(hA, arg, lda, N, lda, 0, 1, hipblas_client_alpha_sets_nan, true, false); hipblas_init_vector(hx, arg, dim_x, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, dim_y, abs_incy, 0, 1, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hz = hy: save a copy in hz which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGemvFn( handle, transA, M, N, (T*)&h_alpha, dA, lda, dx, incx, (T*)&h_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasGemvFn(handle, transA, M, N, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_gemv( transA, M, N, h_alpha, hA.data(), lda, hx.data(), incx, h_beta, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, dim_y, abs_incy, hy_cpu, hy_host); unit_check_general(1, dim_y, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, dim_y, abs_incy, hy_cpu, hy_host); hipblas_error_device = norm_check_general('F', 1, dim_y, abs_incy, hy_cpu, hy_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR( hipblasGemvFn(handle, transA, M, N, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGemvModel{}.log_args(std::cout, arg, gpu_time_used, gemv_gflop_count(transA, M, N), gemv_gbyte_count(transA, M, N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gemv_batched.hpp000066400000000000000000000240201434647641600236610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGemvBatchedModel = ArgumentModel; inline void testname_gemv_batched(const Arguments& arg, std::string& name) { hipblasGemvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gemv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGemvBatchedFn = FORTRAN ? hipblasGemvBatched : hipblasGemvBatched; int M = arg.M; int N = arg.N; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; size_t A_size = size_t(lda) * N; size_t dim_x; size_t dim_y; int batch_count = arg.batch_count; hipblasOperation_t transA = char2hipblas_operation(arg.transA); if(transA == HIPBLAS_OP_N) { dim_x = N; dim_y = M; } else { dim_x = M; dim_y = N; } hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || lda < M || lda < 1 || !incx || !incy || batch_count < 0; if(invalid_size || !M || !N || !batch_count) { hipblasStatus_t actual = hipblasGemvBatchedFn(handle, transA, M, N, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } int abs_incy = incy >= 0 ? incy : -incy; double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(dim_x, incx, batch_count); host_batch_vector hy(dim_y, incy, batch_count); host_batch_vector hy_cpu(dim_y, incy, batch_count); host_batch_vector hy_host(dim_y, incy, batch_count); host_batch_vector hy_device(dim_y, incy, batch_count); // device pointers device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(dim_x, incx, batch_count); device_batch_vector dy(dim_y, incy, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, hipblas_client_beta_sets_nan); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGemvBatchedFn(handle, transA, M, N, (T*)&h_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, (T*)&h_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGemvBatchedFn(handle, transA, M, N, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_gemv(transA, M, N, h_alpha, hA[b], lda, hx[b], incx, h_beta, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, dim_y, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, dim_y, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, dim_y, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, dim_y, abs_incy, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(dy.transfer_from(hy)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasGemvBatchedFn(handle, transA, M, N, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGemvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, gemv_gflop_count(transA, M, N), gemv_gbyte_count(transA, M, N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_gemv_strided_batched.hpp000066400000000000000000000315551434647641600254120ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGemvStridedBatchedModel = ArgumentModel; inline void testname_gemv_strided_batched(const Arguments& arg, std::string& name) { hipblasGemvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_gemv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGemvStridedBatchedFn = FORTRAN ? hipblasGemvStridedBatched : hipblasGemvStridedBatched; int M = arg.M; int N = arg.N; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasStride stride_A = lda * N * stride_scale; hipblasStride stride_x; hipblasStride stride_y; size_t A_size = stride_A * batch_count; size_t X_size, dim_x; size_t Y_size, dim_y; hipblasOperation_t transA = char2hipblas_operation(arg.transA); if(transA == HIPBLAS_OP_N) { dim_x = N; dim_y = M; } else { dim_x = M; dim_y = N; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; stride_x = dim_x * abs_incx * stride_scale; stride_y = dim_y * abs_incy * stride_scale; X_size = stride_x * batch_count; Y_size = stride_y * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || lda < M || lda < 1 || !incx || !incy || batch_count < 0; if(invalid_size || !M || !N || !batch_count) { hipblasStatus_t actual = hipblasGemvStridedBatchedFn(handle, transA, M, N, nullptr, nullptr, lda, stride_A, nullptr, incx, stride_x, nullptr, nullptr, incy, stride_y, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_cpu(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix( hA, arg, M, N, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, dim_x, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector( hy, arg, dim_y, abs_incy, stride_y, batch_count, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hy_cpu = hy: save a copy in hy_cpu which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGemvStridedBatchedFn(handle, transA, M, N, (T*)&h_alpha, dA, lda, stride_A, dx, incx, stride_x, (T*)&h_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGemvStridedBatchedFn(handle, transA, M, N, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_gemv(transA, M, N, h_alpha, hA.data() + b * stride_A, lda, hx.data() + b * stride_x, incx, h_beta, hy_cpu.data() + b * stride_y, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, dim_y, batch_count, abs_incy, stride_y, hy_cpu, hy_host); unit_check_general(1, dim_y, batch_count, abs_incy, stride_y, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, dim_y, abs_incy, stride_y, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, dim_y, abs_incy, stride_y, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasGemvStridedBatchedFn(handle, transA, M, N, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGemvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, gemv_gflop_count(transA, M, N), gemv_gbyte_count(transA, M, N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_geqrf.hpp000066400000000000000000000173751434647641600223740ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "gtest/gtest.h" #include #include #include #include #include "testing_common.hpp" using hipblasGeqrfModel = ArgumentModel; inline void testname_geqrf(const Arguments& arg, std::string& name) { hipblasGeqrfModel{}.test_name(arg, name); } template inline hipblasStatus_t setup_geqrf_testing( host_vector& hA, device_vector& dA, device_vector& dIpiv, int M, int N, int lda) { size_t A_size = size_t(lda) * N; int K = std::min(M, N); // Initial hA on CPU srand(1); hipblas_init(hA, M, N, lda); // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[i + j * lda] += 400; else hA[i + j * lda] -= 4; } } // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemset(dIpiv, 0, K * sizeof(T))); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_geqrf_bad_arg(const Arguments& arg) { auto hipblasGeqrfFn = arg.fortran ? hipblasGeqrf : hipblasGeqrf; hipblasLocalHandle handle(arg); const int M = 100; const int N = 101; const int lda = 102; const size_t A_size = size_t(N) * lda; const int K = std::min(M, N); host_vector hA(A_size); device_vector dA(A_size); device_vector dIpiv(K); int info = 0; EXPECT_HIPBLAS_STATUS(setup_geqrf_testing(hA, dA, dIpiv, M, N, lda), HIPBLAS_STATUS_SUCCESS); EXPECT_HIPBLAS_STATUS(hipblasGeqrfFn(handle, M, N, dA, lda, dIpiv, nullptr), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_HIPBLAS_STATUS(hipblasGeqrfFn(handle, -1, N, dA, lda, dIpiv, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-1, info); EXPECT_HIPBLAS_STATUS(hipblasGeqrfFn(handle, M, -1, dA, lda, dIpiv, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-2, info); EXPECT_HIPBLAS_STATUS(hipblasGeqrfFn(handle, M, N, nullptr, lda, dIpiv, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-3, info); EXPECT_HIPBLAS_STATUS(hipblasGeqrfFn(handle, M, N, dA, M - 1, dIpiv, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-4, info); EXPECT_HIPBLAS_STATUS(hipblasGeqrfFn(handle, M, N, dA, lda, nullptr, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-5, info); // If M == 0 || N == 0, A and ipiv can be nullptr EXPECT_HIPBLAS_STATUS(hipblasGeqrfFn(handle, 0, N, nullptr, lda, nullptr, &info), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); EXPECT_HIPBLAS_STATUS(hipblasGeqrfFn(handle, M, 0, nullptr, lda, nullptr, &info), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_geqrf(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGeqrfFn = FORTRAN ? hipblasGeqrf : hipblasGeqrf; int M = arg.M; int N = arg.N; int K = std::min(M, N); int lda = arg.lda; size_t A_size = size_t(lda) * N; int Ipiv_size = K; int info; hipblasLocalHandle handle(arg); // Check to prevent memory allocation error bool invalid_size = M < 0 || N < 0 || lda < std::max(1, M); if(invalid_size || !M || !N) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA1(A_size); host_vector hIpiv(Ipiv_size); host_vector hIpiv1(Ipiv_size); device_vector dA(A_size); device_vector dIpiv(Ipiv_size); double gpu_time_used, hipblas_error; EXPECT_HIPBLAS_STATUS(setup_geqrf_testing(hA, dA, dIpiv, M, N, lda), HIPBLAS_STATUS_SUCCESS); /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGeqrfFn(handle, M, N, dA, lda, dIpiv, &info)); // Copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hA1, dA, A_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hIpiv1, dIpiv, Ipiv_size * sizeof(T), hipMemcpyDeviceToHost)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU LAPACK =================================================================== */ // Workspace query host_vector work(1); cblas_geqrf(M, N, hA.data(), lda, hIpiv.data(), work.data(), -1); int lwork = type2int(work[0]); // Perform factorization work = host_vector(lwork); cblas_geqrf(M, N, hA.data(), lda, hIpiv.data(), work.data(), lwork); double e1 = norm_check_general('F', M, N, lda, hA, hA1); double e2 = norm_check_general('F', K, 1, K, hIpiv, hIpiv1); hipblas_error = e1 + e2; if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(e1, tolerance); unit_check_error(e2, tolerance); int zero = 0; unit_check_general(1, 1, 1, &zero, &info); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGeqrfFn(handle, M, N, dA, lda, dIpiv, &info)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGeqrfModel{}.log_args(std::cout, arg, gpu_time_used, geqrf_gflop_count(N, M), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_geqrf_batched.hpp000066400000000000000000000227411434647641600240370ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "gtest/gtest.h" #include #include #include #include #include "testing_common.hpp" using hipblasGeqrfBatchedModel = ArgumentModel; inline void testname_geqrf_batched(const Arguments& arg, std::string& name) { hipblasGeqrfBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t setup_geqrf_batched_testing(host_batch_vector& hA, host_batch_vector& hIpiv, device_batch_vector& dA, device_batch_vector& dIpiv, int M, int N, int lda, int batch_count) { // Initial hA on CPU hipblas_init(hA, true); srand(1); for(int b = 0; b < batch_count; b++) { // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dIpiv.transfer_from(hIpiv)); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_geqrf_batched_bad_arg(const Arguments& arg) { auto hipblasGeqrfBatchedFn = arg.fortran ? hipblasGeqrfBatched : hipblasGeqrfBatched; hipblasLocalHandle handle(arg); const int M = 100; const int N = 101; const int lda = 102; const int batch_count = 2; const size_t A_size = size_t(N) * lda; const int K = std::min(M, N); host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hIpiv(K, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dIpiv(K, 1, batch_count); int info = 0; T* const* dAp = dA.ptr_on_device(); T* const* dIpivp = dIpiv.ptr_on_device(); EXPECT_HIPBLAS_STATUS(setup_geqrf_batched_testing(hA, hIpiv, dA, dIpiv, M, N, lda, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_HIPBLAS_STATUS( hipblasGeqrfBatchedFn(handle, M, N, dAp, lda, dIpivp, nullptr, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_HIPBLAS_STATUS( hipblasGeqrfBatchedFn(handle, -1, N, dAp, lda, dIpivp, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-1, info); EXPECT_HIPBLAS_STATUS( hipblasGeqrfBatchedFn(handle, M, -1, dAp, lda, dIpivp, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-2, info); EXPECT_HIPBLAS_STATUS( hipblasGeqrfBatchedFn(handle, M, N, dAp, M - 1, dIpivp, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-4, info); EXPECT_HIPBLAS_STATUS(hipblasGeqrfBatchedFn(handle, M, N, dAp, lda, dIpivp, &info, -1), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-7, info); // If M == 0 || N == 0, A and ipiv can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGeqrfBatchedFn(handle, 0, N, nullptr, lda, nullptr, &info, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); EXPECT_HIPBLAS_STATUS( hipblasGeqrfBatchedFn(handle, M, 0, nullptr, lda, nullptr, &info, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // can't make any assumptions about ptrs when batch_count < 0, this is handled by rocSOLVER // cuBLAS beckend doesn't check for nullptrs for A and ipiv #ifndef __HIP_PLATFORM_NVCC__ EXPECT_HIPBLAS_STATUS( hipblasGeqrfBatchedFn(handle, M, N, nullptr, lda, dIpivp, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-3, info); EXPECT_HIPBLAS_STATUS( hipblasGeqrfBatchedFn(handle, M, N, dAp, lda, nullptr, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-5, info); #endif return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_geqrf_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGeqrfBatchedFn = FORTRAN ? hipblasGeqrfBatched : hipblasGeqrfBatched; int M = arg.M; int N = arg.N; int K = std::min(M, N); int lda = arg.lda; int batch_count = arg.batch_count; size_t A_size = size_t(lda) * N; int Ipiv_size = K; int info; hipblasLocalHandle handle(arg); // Check to prevent memory allocation error bool invalid_size = M < 0 || N < 0 || lda < std::max(1, M) || batch_count < 0; if(invalid_size || !M || !N || !batch_count) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA1(A_size, 1, batch_count); host_batch_vector hIpiv(Ipiv_size, 1, batch_count); host_batch_vector hIpiv1(Ipiv_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dIpiv(Ipiv_size, 1, batch_count); double gpu_time_used, hipblas_error; EXPECT_HIPBLAS_STATUS(setup_geqrf_batched_testing(hA, hIpiv, dA, dIpiv, M, N, lda, batch_count), HIPBLAS_STATUS_SUCCESS); /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGeqrfBatchedFn( handle, M, N, dA.ptr_on_device(), lda, dIpiv.ptr_on_device(), &info, batch_count)); CHECK_HIP_ERROR(hIpiv1.transfer_from(dIpiv)); CHECK_HIP_ERROR(hA1.transfer_from(dA)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU LAPACK =================================================================== */ // Workspace query host_vector work(1); cblas_geqrf(M, N, hA[0], lda, hIpiv[0], work.data(), -1); int lwork = type2int(work[0]); // Perform factorization work = host_vector(lwork); for(int b = 0; b < batch_count; b++) { cblas_geqrf(M, N, hA[b], lda, hIpiv[b], work.data(), N); } double e1 = norm_check_general('F', M, N, lda, hA, hA1, batch_count); double e2 = norm_check_general('F', Ipiv_size, 1, Ipiv_size, hIpiv, hIpiv1, batch_count); hipblas_error = e1 + e2; if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(e1, tolerance); unit_check_error(e2, tolerance); int zero = 0; unit_check_general(1, 1, 1, &zero, &info); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGeqrfBatchedFn( handle, M, N, dA.ptr_on_device(), lda, dIpiv.ptr_on_device(), &info, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGeqrfBatchedModel{}.log_args(std::cout, arg, gpu_time_used, geqrf_gflop_count(N, M), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_geqrf_strided_batched.hpp000066400000000000000000000263631434647641600255610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGeqrfStridedBatchedModel = ArgumentModel; inline void testname_geqrf_strided_batched(const Arguments& arg, std::string& name) { hipblasGeqrfStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t setup_geqrf_strided_batched_testing(host_vector& hA, device_vector& dA, device_vector& dIpiv, int M, int N, int lda, hipblasStride strideA, hipblasStride strideP, int batch_count) { size_t A_size = strideA * batch_count; size_t Ipiv_size = strideP * batch_count; // Initial hA on CPU srand(1); for(int b = 0; b < batch_count; b++) { T* hAb = hA.data() + b * strideA; hipblas_init(hAb, M, N, lda); // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hAb[i + j * lda] += 400; else hAb[i + j * lda] -= 4; } } } // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemset(dIpiv, 0, Ipiv_size * sizeof(T))); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_geqrf_strided_batched_bad_arg(const Arguments& arg) { auto hipblasGeqrfStridedBatchedFn = arg.fortran ? hipblasGeqrfStridedBatched : hipblasGeqrfStridedBatched; hipblasLocalHandle handle(arg); const int M = 100; const int N = 101; const int K = std::min(M, N); const int lda = 102; const int batch_count = 2; hipblasStride strideA = size_t(lda) * N; hipblasStride strideP = K; size_t A_size = strideA * batch_count; size_t Ipiv_size = strideP * batch_count; host_vector hA(A_size); device_vector dA(A_size); device_vector dIpiv(Ipiv_size); int info = 0; EXPECT_HIPBLAS_STATUS(setup_geqrf_strided_batched_testing( hA, dA, dIpiv, M, N, lda, strideA, strideP, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_HIPBLAS_STATUS(hipblasGeqrfStridedBatchedFn( handle, M, N, dA, lda, strideA, dIpiv, strideP, nullptr, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_HIPBLAS_STATUS(hipblasGeqrfStridedBatchedFn( handle, -1, N, dA, lda, strideA, dIpiv, strideP, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-1, info); EXPECT_HIPBLAS_STATUS(hipblasGeqrfStridedBatchedFn( handle, M, -1, dA, lda, strideA, dIpiv, strideP, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-2, info); EXPECT_HIPBLAS_STATUS( hipblasGeqrfStridedBatchedFn( handle, M, N, nullptr, lda, strideA, dIpiv, strideP, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-3, info); EXPECT_HIPBLAS_STATUS(hipblasGeqrfStridedBatchedFn( handle, M, N, dA, M - 1, strideA, dIpiv, strideP, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-4, info); EXPECT_HIPBLAS_STATUS(hipblasGeqrfStridedBatchedFn( handle, M, N, dA, lda, strideA, nullptr, strideP, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-6, info); EXPECT_HIPBLAS_STATUS( hipblasGeqrfStridedBatchedFn(handle, M, N, dA, lda, strideA, dIpiv, strideP, &info, -1), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-9, info); // If M == 0 || N == 0, A and ipiv can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGeqrfStridedBatchedFn( handle, 0, N, nullptr, lda, strideA, nullptr, strideP, &info, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); EXPECT_HIPBLAS_STATUS( hipblasGeqrfStridedBatchedFn( handle, M, 0, nullptr, lda, strideA, nullptr, strideP, &info, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // can't make any assumptions about ptrs when batch_count < 0, this is handled by rocSOLVER return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_geqrf_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGeqrfStridedBatchedFn = FORTRAN ? hipblasGeqrfStridedBatched : hipblasGeqrfStridedBatched; int M = arg.M; int N = arg.N; int K = std::min(M, N); int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasStride strideA = lda * N * stride_scale; hipblasStride strideP = K * stride_scale; int A_size = strideA * batch_count; int Ipiv_size = strideP * batch_count; int info; hipblasLocalHandle handle(arg); // Check to prevent memory allocation error bool invalid_size = M < 0 || N < 0 || lda < std::max(1, M) || batch_count < 0; if(invalid_size || !M || !N || !batch_count) { // including pointers so can test other params device_vector dA(1); device_vector dIpiv(1); hipblasStatus_t status = hipblasGeqrfStridedBatchedFn( handle, M, N, dA, lda, strideA, dIpiv, strideP, &info, batch_count); EXPECT_HIPBLAS_STATUS( status, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); int expected_info = 0; if(M < 0) expected_info = -1; else if(N < 0) expected_info = -2; else if(lda < std::max(1, M)) expected_info = -4; else if(batch_count < 0) expected_info = -9; unit_check_general(1, 1, 1, &expected_info, &info); return status; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA1(A_size); host_vector hIpiv(Ipiv_size); host_vector hIpiv1(Ipiv_size); device_vector dA(A_size); device_vector dIpiv(Ipiv_size); double gpu_time_used, hipblas_error; EXPECT_HIPBLAS_STATUS(setup_geqrf_strided_batched_testing( hA, dA, dIpiv, M, N, lda, strideA, strideP, batch_count), HIPBLAS_STATUS_SUCCESS); /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGeqrfStridedBatchedFn( handle, M, N, dA, lda, strideA, dIpiv, strideP, &info, batch_count)); // Copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hA1.data(), dA, A_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hIpiv1.data(), dIpiv, Ipiv_size * sizeof(T), hipMemcpyDeviceToHost)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU LAPACK =================================================================== */ // Workspace query host_vector work(1); cblas_geqrf(M, N, hA.data(), lda, hIpiv.data(), work.data(), -1); int lwork = type2int(work[0]); // Perform factorization work = host_vector(lwork); for(int b = 0; b < batch_count; b++) { cblas_geqrf( M, N, hA.data() + b * strideA, lda, hIpiv.data() + b * strideP, work.data(), N); } double e1 = norm_check_general('F', M, N, lda, strideA, hA, hA1, batch_count); double e2 = norm_check_general('F', K, 1, K, strideP, hIpiv, hIpiv1, batch_count); hipblas_error = e1 + e2; if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(e1, tolerance); unit_check_error(e2, tolerance); int zero = 0; unit_check_general(1, 1, 1, &zero, &info); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGeqrfStridedBatchedFn( handle, M, N, dA, lda, strideA, dIpiv, strideP, &info, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGeqrfStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, geqrf_gflop_count(N, M), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_ger.hpp000066400000000000000000000160511434647641600220330ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGerModel = ArgumentModel; inline void testname_ger(const Arguments& arg, std::string& name) { hipblasGerModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_ger(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGerFn = FORTRAN ? (CONJ ? hipblasGer : hipblasGer) : (CONJ ? hipblasGer : hipblasGer); int M = arg.M; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int lda = arg.lda; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t x_size = size_t(M) * abs_incx; size_t y_size = size_t(M) * abs_incy; size_t A_size = size_t(lda) * N; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || !incx || !incy || lda < M || lda < 1; if(invalid_size || !M || !N) { hipblasStatus_t actual = hipblasGerFn(handle, M, N, nullptr, nullptr, incx, nullptr, incy, nullptr, lda); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hA_cpu(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix(hA, arg, lda, N, lda, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, M, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan); // copy matrix is easy in STL; hB = hA: save a copy in hB which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGerFn(handle, M, N, (T*)&h_alpha, dx, incx, dy, incy, dA, lda)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGerFn(handle, M, N, d_alpha, dx, incx, dy, incy, dA, lda)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_ger(M, N, h_alpha, hx.data(), incx, hy.data(), incy, hA_cpu.data(), lda); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, lda, hA_cpu.data(), hA_host.data()); unit_check_general(M, N, lda, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, lda, hA_cpu.data(), hA_host.data()); hipblas_error_device = norm_check_general('F', M, N, lda, hA_cpu.data(), hA_device.data()); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGerFn(handle, M, N, d_alpha, dx, incx, dy, incy, dA, lda)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGerModel{}.log_args(std::cout, arg, gpu_time_used, ger_gflop_count(M, N), ger_gbyte_count(M, N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_ger_batched.hpp000066400000000000000000000211771434647641600235120ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGerBatchedModel = ArgumentModel; inline void testname_ger_batched(const Arguments& arg, std::string& name) { hipblasGerBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_ger_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGerBatchedFn = FORTRAN ? (CONJ ? hipblasGerBatched : hipblasGerBatched) : (CONJ ? hipblasGerBatched : hipblasGerBatched); int M = arg.M; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int lda = arg.lda; int batch_count = arg.batch_count; size_t A_size = size_t(lda) * N; double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || !incx || !incy || lda < M || lda < 1 || batch_count < 0; if(invalid_size || !M || !N || !batch_count) { hipblasStatus_t actual = hipblasGerBatchedFn( handle, M, N, nullptr, nullptr, incx, nullptr, incy, nullptr, lda, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_cpu(A_size, 1, batch_count); host_batch_vector hA_host(A_size, 1, batch_count); host_batch_vector hA_device(A_size, 1, batch_count); host_batch_vector hx(M, incx, batch_count); host_batch_vector hy(N, incy, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(M, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, hipblas_client_alpha_sets_nan); hA_cpu.copy_from(hA); hA_host.copy_from(hA); hA_device.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGerBatchedFn(handle, M, N, (T*)&h_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_host.transfer_from(dA)); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGerBatchedFn(handle, M, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_device.transfer_from(dA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_ger(M, N, h_alpha, hx[b], incx, hy[b], incy, hA_cpu[b], lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, lda, hA_cpu, hA_host); unit_check_general(M, N, batch_count, lda, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, lda, hA_cpu, hA_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, lda, hA_cpu, hA_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dA.transfer_from(hA)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGerBatchedFn(handle, M, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), lda, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGerBatchedModel{}.log_args(std::cout, arg, gpu_time_used, ger_gflop_count(M, N), ger_gbyte_count(M, N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_ger_strided_batched.hpp000066400000000000000000000267471434647641600252400ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasGerStridedBatchedModel = ArgumentModel; inline void testname_ger_strided_batched(const Arguments& arg, std::string& name) { hipblasGerStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_ger_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasGerStridedBatchedFn = FORTRAN ? (CONJ ? hipblasGerStridedBatched : hipblasGerStridedBatched) : (CONJ ? hipblasGerStridedBatched : hipblasGerStridedBatched); int M = arg.M; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stride_A = size_t(lda) * N * stride_scale; hipblasStride stride_x = size_t(M) * abs_incx * stride_scale; hipblasStride stride_y = size_t(N) * abs_incy * stride_scale; size_t A_size = stride_A * batch_count; size_t x_size = stride_x * batch_count; size_t y_size = stride_y * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || N < 0 || !incx || !incy || lda < M || lda < 1 || batch_count < 0; if(invalid_size || !M || !N || !batch_count) { hipblasStatus_t actual = hipblasGerStridedBatchedFn(handle, M, N, nullptr, nullptr, incx, stride_x, nullptr, incy, stride_y, nullptr, lda, stride_A, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix( hA, arg, M, N, lda, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, M, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, stride_y, batch_count, hipblas_client_alpha_sets_nan); // copy matrix is easy in STL; hB = hA: save a copy in hB which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasGerStridedBatchedFn(handle, M, N, (T*)&h_alpha, dx, incx, stride_x, dy, incy, stride_y, dA, lda, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasGerStridedBatchedFn(handle, M, N, d_alpha, dx, incx, stride_x, dy, incy, stride_y, dA, lda, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_ger(M, N, h_alpha, hx.data() + b * stride_x, incx, hy.data() + b * stride_y, incy, hA_cpu.data() + b * stride_A, lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, lda, stride_A, hA_cpu.data(), hA_host.data()); unit_check_general( M, N, batch_count, lda, stride_A, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', M, N, lda, stride_A, hA_cpu.data(), hA_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', M, N, lda, stride_A, hA_cpu.data(), hA_device.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * lda * N, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGerStridedBatchedFn(handle, M, N, d_alpha, dx, incx, stride_x, dy, incy, stride_y, dA, lda, stride_A, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGerStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, ger_gflop_count(M, N), ger_gbyte_count(M, N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getrf.hpp000066400000000000000000000121331434647641600223620ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetrfModel = ArgumentModel; inline void testname_getrf(const Arguments& arg, std::string& name) { hipblasGetrfModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_getrf(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetrfFn = FORTRAN ? hipblasGetrf : hipblasGetrf; int M = arg.N; int N = arg.N; int lda = arg.lda; size_t A_size = size_t(lda) * N; int Ipiv_size = std::min(M, N); // Check to prevent memory allocation error if(M < 0 || N < 0 || lda < M) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA1(A_size); host_vector hIpiv(Ipiv_size); host_vector hIpiv1(Ipiv_size); host_vector hInfo(1); host_vector hInfo1(1); device_vector dA(A_size); device_vector dIpiv(Ipiv_size); device_vector dInfo(1); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA on CPU srand(1); hipblas_init(hA, M, N, lda); // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[i + j * lda] += 400; else hA[i + j * lda] -= 4; } } // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemset(dIpiv, 0, Ipiv_size * sizeof(int))); CHECK_HIP_ERROR(hipMemset(dInfo, 0, sizeof(int))); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGetrfFn(handle, N, dA, lda, dIpiv, dInfo)); // Copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hA1, dA, A_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hIpiv1, dIpiv, Ipiv_size * sizeof(int), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hInfo1, dInfo, sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ hInfo[0] = cblas_getrf(M, N, hA.data(), lda, hIpiv.data()); hipblas_error = norm_check_general('F', M, N, lda, hA, hA1); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetrfFn(handle, N, dA, lda, dIpiv, dInfo)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetrfModel{}.log_args(std::cout, arg, gpu_time_used, getrf_gflop_count(N, M), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getrf_batched.hpp000066400000000000000000000133661434647641600240450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetrfBatchedModel = ArgumentModel; inline void testname_getrf_batched(const Arguments& arg, std::string& name) { hipblasGetrfBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_getrf_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetrfBatchedFn = FORTRAN ? hipblasGetrfBatched : hipblasGetrfBatched; int M = arg.N; int N = arg.N; int lda = arg.lda; int batch_count = arg.batch_count; hipblasStride strideP = std::min(M, N); size_t A_size = size_t(lda) * N; size_t Ipiv_size = strideP * batch_count; // Check to prevent memory allocation error if(M < 0 || N < 0 || lda < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA1(A_size, 1, batch_count); host_vector hIpiv(Ipiv_size); host_vector hIpiv1(Ipiv_size); host_vector hInfo(batch_count); host_vector hInfo1(batch_count); device_batch_vector dA(A_size, 1, batch_count); device_vector dIpiv(Ipiv_size); device_vector dInfo(batch_count); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init(hA, true); for(int b = 0; b < batch_count; b++) { // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(hipMemset(dIpiv, 0, Ipiv_size * sizeof(int))); CHECK_HIP_ERROR(hipMemset(dInfo, 0, batch_count * sizeof(int))); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR( hipblasGetrfBatchedFn(handle, N, dA.ptr_on_device(), lda, dIpiv, dInfo, batch_count)); // Copy output from device to CPU CHECK_HIP_ERROR(hA1.transfer_from(dA)); CHECK_HIP_ERROR( hipMemcpy(hIpiv1.data(), dIpiv, Ipiv_size * sizeof(int), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(hInfo1.data(), dInfo, batch_count * sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ for(int b = 0; b < batch_count; b++) { hInfo[b] = cblas_getrf(M, N, hA[b], lda, hIpiv.data() + b * strideP); } hipblas_error = norm_check_general('F', M, N, lda, hA, hA1, batch_count); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetrfBatchedFn( handle, N, dA.ptr_on_device(), lda, dIpiv, dInfo, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetrfBatchedModel{}.log_args(std::cout, arg, gpu_time_used, getrf_gflop_count(N, M), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getrf_npvt.hpp000066400000000000000000000116471434647641600234420ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetrfNpvtModel = ArgumentModel; inline void testname_getrf_npvt(const Arguments& arg, std::string& name) { hipblasGetrfNpvtModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_getrf_npvt(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetrfFn = FORTRAN ? hipblasGetrf : hipblasGetrf; int M = arg.N; int N = arg.N; int lda = arg.lda; size_t A_size = size_t(lda) * N; size_t Ipiv_size = std::min(M, N); // Check to prevent memory allocation error if(M < 0 || N < 0 || lda < M) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA1(A_size); host_vector hIpiv(Ipiv_size); host_vector hInfo(1); host_vector hInfo1(1); device_vector dA(A_size); device_vector dInfo(1); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA on CPU srand(1); hipblas_init(hA, M, N, lda); // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[i + j * lda] += 400; else hA[i + j * lda] -= 4; } } // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemset(dInfo, 0, sizeof(int))); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGetrfFn(handle, N, dA, lda, nullptr, dInfo)); // Copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hA1.data(), dA, A_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hInfo1.data(), dInfo, sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ hInfo[0] = cblas_getrf(M, N, hA.data(), lda, hIpiv.data()); hipblas_error = norm_check_general('F', M, N, lda, hA, hA1); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetrfFn(handle, N, dA, lda, nullptr, dInfo)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetrfNpvtModel{}.log_args(std::cout, arg, gpu_time_used, getrf_gflop_count(N, M), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getrf_npvt_batched.hpp000066400000000000000000000130211434647641600251000ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetrfNpvtBatchedModel = ArgumentModel; inline void testname_getrf_npvt_batched(const Arguments& arg, std::string& name) { hipblasGetrfNpvtBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_getrf_npvt_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetrfBatchedFn = FORTRAN ? hipblasGetrfBatched : hipblasGetrfBatched; int M = arg.N; int N = arg.N; int lda = arg.lda; int batch_count = arg.batch_count; hipblasStride strideP = std::min(M, N); size_t A_size = size_t(lda) * N; size_t Ipiv_size = strideP * batch_count; // Check to prevent memory allocation error if(M < 0 || N < 0 || lda < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA1(A_size, 1, batch_count); host_vector hIpiv(Ipiv_size); host_vector hInfo(batch_count); host_vector hInfo1(batch_count); device_batch_vector dA(A_size, 1, batch_count); device_vector dInfo(batch_count); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init(hA, true); for(int b = 0; b < batch_count; b++) { // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(hipMemset(dInfo, 0, batch_count * sizeof(int))); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR( hipblasGetrfBatchedFn(handle, N, dA.ptr_on_device(), lda, nullptr, dInfo, batch_count)); // Copy output from device to CPU CHECK_HIP_ERROR(hA1.transfer_from(dA)); CHECK_HIP_ERROR( hipMemcpy(hInfo1.data(), dInfo, batch_count * sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ for(int b = 0; b < batch_count; b++) { hInfo[b] = cblas_getrf(M, N, hA[b], lda, hIpiv.data() + b * strideP); } hipblas_error = norm_check_general('F', M, N, lda, hA, hA1, batch_count); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetrfBatchedFn( handle, N, dA.ptr_on_device(), lda, nullptr, dInfo, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetrfNpvtBatchedModel{}.log_args(std::cout, arg, gpu_time_used, getrf_gflop_count(N, M), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getrf_npvt_strided_batched.hpp000066400000000000000000000137001434647641600266220ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetrfNpvtStridedBatchedModel = ArgumentModel; inline void testname_getrf_npvt_strided_batched(const Arguments& arg, std::string& name) { hipblasGetrfNpvtStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_getrf_npvt_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetrfStridedBatchedFn = FORTRAN ? hipblasGetrfStridedBatched : hipblasGetrfStridedBatched; int M = arg.N; int N = arg.N; int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasStride strideA = size_t(lda) * N * stride_scale; hipblasStride strideP = size_t(std::min(M, N)) * stride_scale; size_t A_size = strideA * batch_count; size_t Ipiv_size = strideP * batch_count; // Check to prevent memory allocation error if(M < 0 || N < 0 || lda < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA1(A_size); host_vector hIpiv(Ipiv_size); host_vector hInfo(batch_count); host_vector hInfo1(batch_count); device_vector dA(A_size); device_vector dInfo(batch_count); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA on CPU srand(1); for(int b = 0; b < batch_count; b++) { T* hAb = hA.data() + b * strideA; hipblas_init(hAb, M, N, lda); // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hAb[i + j * lda] += 400; else hAb[i + j * lda] -= 4; } } } // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemset(dInfo, 0, batch_count * sizeof(int))); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGetrfStridedBatchedFn( handle, N, dA, lda, strideA, nullptr, strideP, dInfo, batch_count)); // Copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hA1.data(), dA, A_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(hInfo1.data(), dInfo, batch_count * sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ for(int b = 0; b < batch_count; b++) { hInfo[b] = cblas_getrf(M, N, hA.data() + b * strideA, lda, hIpiv.data() + b * strideP); } hipblas_error = norm_check_general('F', M, N, lda, strideA, hA, hA1, batch_count); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetrfStridedBatchedFn( handle, N, dA, lda, strideA, nullptr, strideP, dInfo, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetrfNpvtStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, getrf_gflop_count(N, M), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getrf_strided_batched.hpp000066400000000000000000000142211434647641600255520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetrfStridedBatchedModel = ArgumentModel; inline void testname_getrf_strided_batched(const Arguments& arg, std::string& name) { hipblasGetrfStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_getrf_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetrfStridedBatchedFn = FORTRAN ? hipblasGetrfStridedBatched : hipblasGetrfStridedBatched; int M = arg.N; int N = arg.N; int lda = arg.lda; int batch_count = arg.batch_count; double stride_scale = arg.stride_scale; hipblasStride strideA = size_t(lda) * N * stride_scale; hipblasStride strideP = std::min(M, N) * stride_scale; size_t A_size = strideA * batch_count; size_t Ipiv_size = strideP * batch_count; // Check to prevent memory allocation error if(M < 0 || N < 0 || lda < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA1(A_size); host_vector hIpiv(Ipiv_size); host_vector hIpiv1(Ipiv_size); host_vector hInfo(batch_count); host_vector hInfo1(batch_count); device_vector dA(A_size); device_vector dIpiv(Ipiv_size); device_vector dInfo(batch_count); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA on CPU srand(1); for(int b = 0; b < batch_count; b++) { T* hAb = hA.data() + b * strideA; hipblas_init(hAb, M, N, lda); // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hAb[i + j * lda] += 400; else hAb[i + j * lda] -= 4; } } } // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemset(dIpiv, 0, Ipiv_size * sizeof(int))); CHECK_HIP_ERROR(hipMemset(dInfo, 0, batch_count * sizeof(int))); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGetrfStridedBatchedFn( handle, N, dA, lda, strideA, dIpiv, strideP, dInfo, batch_count)); // Copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hA1.data(), dA, A_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(hIpiv1.data(), dIpiv, Ipiv_size * sizeof(int), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(hInfo1.data(), dInfo, batch_count * sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ for(int b = 0; b < batch_count; b++) { hInfo[b] = cblas_getrf(M, N, hA.data() + b * strideA, lda, hIpiv.data() + b * strideP); } hipblas_error = norm_check_general('F', M, N, lda, strideA, hA, hA1, batch_count); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetrfStridedBatchedFn( handle, N, dA, lda, strideA, dIpiv, strideP, dInfo, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetrfStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, getrf_gflop_count(N, M), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getri_batched.hpp000066400000000000000000000163131434647641600240430ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetriBatchedModel = ArgumentModel; inline void testname_getri_batched(const Arguments& arg, std::string& name) { hipblasGetriBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_getri_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetriBatchedFn = FORTRAN ? hipblasGetriBatched : hipblasGetriBatched; int M = arg.N; int N = arg.N; int lda = arg.lda; int batch_count = arg.batch_count; hipblasStride strideP = std::min(M, N); size_t A_size = size_t(lda) * N; size_t Ipiv_size = strideP * batch_count; // Check to prevent memory allocation error if(M < 0 || N < 0 || lda < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA1(A_size, 1, batch_count); host_batch_vector hC(A_size, 1, batch_count); host_vector hIpiv(Ipiv_size); host_vector hIpiv1(Ipiv_size); host_vector hInfo(batch_count); host_vector hInfo1(batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dC(A_size, 1, batch_count); device_vector dIpiv(Ipiv_size); device_vector dInfo(batch_count); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init(hA, true); for(int b = 0; b < batch_count; b++) { // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // perform LU factorization on A int* hIpivb = hIpiv.data() + b * strideP; hInfo[b] = cblas_getrf(M, N, hA[b], lda, hIpivb); } CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dC.transfer_from(hC)); CHECK_HIP_ERROR(hipMemcpy(dIpiv, hIpiv, Ipiv_size * sizeof(int), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemset(dInfo, 0, batch_count * sizeof(int))); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGetriBatchedFn(handle, N, dA.ptr_on_device(), lda, dIpiv, dC.ptr_on_device(), lda, dInfo, batch_count)); // Copy output from device to CPU CHECK_HIP_ERROR(hA1.transfer_from(dC)); CHECK_HIP_ERROR( hipMemcpy(hIpiv1.data(), dIpiv, Ipiv_size * sizeof(int), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(hInfo1.data(), dInfo, batch_count * sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ for(int b = 0; b < batch_count; b++) { // Workspace query host_vector work(1); cblas_getri(N, hA[b], lda, hIpiv.data() + b * strideP, work.data(), -1); int lwork = type2int(work[0]); // Perform inversion work = host_vector(lwork); hInfo[b] = cblas_getri(N, hA[b], lda, hIpiv.data() + b * strideP, work.data(), lwork); hipblas_error = norm_check_general('F', M, N, lda, hA[b], hA1[b]); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(hipblas_error, tolerance); } } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetriBatchedFn(handle, N, dA.ptr_on_device(), lda, dIpiv, dC.ptr_on_device(), lda, dInfo, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetriBatchedModel{}.log_args(std::cout, arg, gpu_time_used, getri_gflop_count(N), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getri_npvt_batched.hpp000066400000000000000000000156541434647641600251210ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetriNpvtBatchedModel = ArgumentModel; inline void testname_getri_npvt_batched(const Arguments& arg, std::string& name) { hipblasGetriNpvtBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_getri_npvt_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetriBatchedFn = FORTRAN ? hipblasGetriBatched : hipblasGetriBatched; int M = arg.N; int N = arg.N; int lda = arg.lda; int batch_count = arg.batch_count; hipblasStride strideP = std::min(M, N); size_t A_size = size_t(lda) * N; size_t Ipiv_size = strideP * batch_count; // Check to prevent memory allocation error if(M < 0 || N < 0 || lda < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA1(A_size, 1, batch_count); host_batch_vector hC(A_size, 1, batch_count); host_vector hIpiv(Ipiv_size); host_vector hInfo(batch_count); host_vector hInfo1(batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dC(A_size, 1, batch_count); device_vector dInfo(batch_count); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init(hA, true); for(int b = 0; b < batch_count; b++) { // scale A to avoid singularities for(int i = 0; i < M; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // perform LU factorization on A int* hIpivb = hIpiv.data() + b * strideP; hInfo[b] = cblas_getrf(M, N, hA[b], lda, hIpivb); } CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dC.transfer_from(hC)); CHECK_HIP_ERROR(hipMemset(dInfo, 0, batch_count * sizeof(int))); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGetriBatchedFn(handle, N, dA.ptr_on_device(), lda, nullptr, dC.ptr_on_device(), lda, dInfo, batch_count)); // Copy output from device to CPU CHECK_HIP_ERROR(hA1.transfer_from(dC)); CHECK_HIP_ERROR( hipMemcpy(hInfo1.data(), dInfo, batch_count * sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ for(int b = 0; b < batch_count; b++) { // Workspace query host_vector work(1); cblas_getri(N, hA[b], lda, hIpiv.data() + b * strideP, work.data(), -1); int lwork = type2int(work[0]); // Perform inversion work = host_vector(lwork); hInfo[b] = cblas_getri(N, hA[b], lda, hIpiv.data() + b * strideP, work.data(), lwork); } hipblas_error = norm_check_general('F', M, N, lda, hA, hA1, batch_count); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = eps * 2000; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetriBatchedFn(handle, N, dA.ptr_on_device(), lda, nullptr, dC.ptr_on_device(), lda, dInfo, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetriNpvtBatchedModel{}.log_args(std::cout, arg, gpu_time_used, getri_gflop_count(N), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getrs.hpp000066400000000000000000000232561434647641600224070ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "gtest/gtest.h" #include #include #include #include #include "testing_common.hpp" using hipblasGetrsModel = ArgumentModel; inline void testname_getrs(const Arguments& arg, std::string& name) { hipblasGetrsModel{}.test_name(arg, name); } template inline hipblasStatus_t setup_getrs_testing(host_vector& hA, host_vector& hB, host_vector& hX, host_vector& hIpiv, device_vector& dA, device_vector& dB, device_vector& dIpiv, int N, int lda, int ldb) { const size_t A_size = size_t(N) * lda; const size_t B_size = ldb; const size_t Ipiv_size = N; // Initial hA, hB, hX on CPU srand(1); hipblas_init(hA, N, N, lda); hipblas_init(hX, N, 1, ldb); // scale A to avoid singularities for(int i = 0; i < N; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[i + j * lda] += 400; else hA[i + j * lda] -= 4; } } // Calculate hB = hA*hX; hipblasOperation_t opN = HIPBLAS_OP_N; cblas_gemm(opN, opN, N, 1, N, (T)1, hA.data(), lda, hX.data(), ldb, (T)0, hB.data(), ldb); // LU factorize hA on the CPU int info = cblas_getrf(N, N, hA.data(), lda, hIpiv.data()); if(info != 0) { std::cerr << "LU decomposition failed" << std::endl; return HIPBLAS_STATUS_INTERNAL_ERROR; } // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, B_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dIpiv, hIpiv, Ipiv_size * sizeof(int), hipMemcpyHostToDevice)); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_getrs_bad_arg(const Arguments& arg) { auto hipblasGetrsFn = arg.fortran ? hipblasGetrs : hipblasGetrs; hipblasLocalHandle handle(arg); const int N = 100; const int nrhs = 1; const int lda = 101; const int ldb = 102; const size_t A_size = size_t(N) * lda; const size_t B_size = ldb; const int Ipiv_size = N; const hipblasOperation_t op = HIPBLAS_OP_N; host_vector hA(A_size); host_vector hB(B_size); host_vector hX(B_size); host_vector hIpiv(Ipiv_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dIpiv(Ipiv_size); int info = 0; // Need initialization code because even with bad params we call roc/cu-solver // so want to give reasonable data EXPECT_HIPBLAS_STATUS(setup_getrs_testing(hA, hB, hX, hIpiv, dA, dB, dIpiv, N, lda, ldb), HIPBLAS_STATUS_SUCCESS); EXPECT_HIPBLAS_STATUS(hipblasGetrsFn(handle, op, N, nrhs, dA, lda, dIpiv, dB, ldb, nullptr), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_HIPBLAS_STATUS(hipblasGetrsFn(handle, op, -1, nrhs, dA, lda, dIpiv, dB, ldb, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-2, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsFn(handle, op, N, -1, dA, lda, dIpiv, dB, ldb, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-3, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsFn(handle, op, N, nrhs, nullptr, lda, dIpiv, dB, ldb, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-4, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsFn(handle, op, N, nrhs, dA, N - 1, dIpiv, dB, ldb, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-5, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsFn(handle, op, N, nrhs, dA, lda, nullptr, dB, ldb, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-6, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsFn(handle, op, N, nrhs, dA, lda, dIpiv, nullptr, ldb, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-7, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsFn(handle, op, N, nrhs, dA, lda, dIpiv, dB, N - 1, &info), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-8, info); // If N == 0, A, B, and ipiv can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGetrsFn(handle, op, 0, nrhs, nullptr, lda, nullptr, nullptr, ldb, &info), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // if nrhs == 0, B can be nullptr EXPECT_HIPBLAS_STATUS(hipblasGetrsFn(handle, op, N, 0, dA, lda, dIpiv, nullptr, ldb, &info), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_getrs(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetrsFn = FORTRAN ? hipblasGetrs : hipblasGetrs; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; size_t A_size = size_t(lda) * N; size_t B_size = ldb * 1; size_t Ipiv_size = N; // Check to prevent memory allocation error if(N < 0 || lda < N || ldb < N) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hX(B_size); host_vector hB(B_size); host_vector hB1(B_size); host_vector hIpiv(Ipiv_size); host_vector hIpiv1(Ipiv_size); int info; device_vector dA(A_size); device_vector dB(B_size); device_vector dIpiv(Ipiv_size); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); hipblasOperation_t op = HIPBLAS_OP_N; EXPECT_HIPBLAS_STATUS(setup_getrs_testing(hA, hB, hX, hIpiv, dA, dB, dIpiv, N, lda, ldb), HIPBLAS_STATUS_SUCCESS); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGetrsFn(handle, op, N, 1, dA, lda, dIpiv, dB, ldb, &info)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hB1, dB, B_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hIpiv1, dIpiv, Ipiv_size * sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ cblas_getrs('N', N, 1, hA.data(), lda, hIpiv.data(), hB.data(), ldb); hipblas_error = norm_check_general('F', N, 1, ldb, hB.data(), hB1.data()); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = N * eps * 100; int zero = 0; unit_check_error(hipblas_error, tolerance); unit_check_general(1, 1, 1, &zero, &info); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetrsFn(handle, op, N, 1, dA, lda, dIpiv, dB, ldb, &info)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetrsModel{}.log_args(std::cout, arg, gpu_time_used, getrs_gflop_count(N, 1), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getrs_batched.hpp000066400000000000000000000310341434647641600240520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetrsBatchedModel = ArgumentModel; inline void testname_getrs_batched(const Arguments& arg, std::string& name) { hipblasGetrsBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t setup_getrs_batched_testing(host_batch_vector& hA, host_batch_vector& hB, host_batch_vector& hX, host_vector& hIpiv, device_batch_vector& dA, device_batch_vector& dB, device_vector& dIpiv, int N, int lda, int ldb, int batch_count) { hipblasStride strideP = N; size_t A_size = size_t(lda) * N; size_t B_size = size_t(ldb) * 1; size_t Ipiv_size = strideP * batch_count; // Initial hA, hB, hX on CPU hipblas_init(hA, true); hipblas_init(hX); srand(1); hipblasOperation_t op = HIPBLAS_OP_N; for(int b = 0; b < batch_count; b++) { // scale A to avoid singularities for(int i = 0; i < N; i++) { for(int j = 0; j < N; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // Calculate hB = hA*hX; cblas_gemm(op, op, N, 1, N, (T)1, hA[b], lda, hX[b], ldb, (T)0, hB[b], ldb); // LU factorize hA on the CPU int info = cblas_getrf(N, N, hA[b], lda, hIpiv.data() + b * strideP); if(info != 0) { std::cerr << "LU decomposition failed" << std::endl; return HIPBLAS_STATUS_INTERNAL_ERROR; } } CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(hipMemcpy(dIpiv, hIpiv.data(), Ipiv_size * sizeof(int), hipMemcpyHostToDevice)); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_getrs_batched_bad_arg(const Arguments& arg) { auto hipblasGetrsBatchedFn = arg.fortran ? hipblasGetrsBatched : hipblasGetrsBatched; hipblasLocalHandle handle(arg); const int N = 100; const int nrhs = 1; const int lda = 101; const int ldb = 102; const int batch_count = 2; const size_t A_size = size_t(N) * lda; const size_t B_size = ldb; const size_t Ipiv_size = size_t(N) * batch_count; const hipblasOperation_t op = HIPBLAS_OP_N; host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hX(B_size, 1, batch_count); host_vector hIpiv(Ipiv_size); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_vector dIpiv(Ipiv_size); int info = 0; T* const* dAp = dA.ptr_on_device(); T* const* dBp = dB.ptr_on_device(); // Need initialization code because even with bad params we call roc/cu-solver // so want to give reasonable data EXPECT_HIPBLAS_STATUS( setup_getrs_batched_testing(hA, hB, hX, hIpiv, dA, dB, dIpiv, N, lda, ldb, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn(handle, op, -1, nrhs, dAp, lda, dIpiv, dBp, ldb, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-2, info); EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn(handle, op, N, -1, dAp, lda, dIpiv, dBp, ldb, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-3, info); EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn(handle, op, N, nrhs, dAp, N - 1, dIpiv, dBp, ldb, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-5, info); EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn(handle, op, N, nrhs, dAp, lda, dIpiv, dBp, N - 1, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-8, info); // cuBLAS returns HIPBLAS_STATUS_EXECUTION_FAILED and gives info == 0 #ifndef __HIP_PLATFORM_NVCC__ EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn(handle, op, N, nrhs, dAp, lda, dIpiv, dBp, ldb, &info, -1), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-10, info); #endif // If N == 0, A, B, and ipiv can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn( handle, op, 0, nrhs, nullptr, lda, nullptr, nullptr, ldb, &info, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // if nrhs == 0, B can be nullptr EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn(handle, op, N, 0, dAp, lda, dIpiv, nullptr, ldb, &info, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // can't make any assumptions about ptrs when batch_count < 0, this is handled by rocSOLVER // cuBLAS beckend doesn't check for nullptrs, including info, hipBLAS/rocSOLVER does #ifndef __HIP_PLATFORM_NVCC__ EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn(handle, op, N, nrhs, dAp, lda, dIpiv, dBp, ldb, nullptr, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn( handle, op, N, nrhs, nullptr, lda, dIpiv, dBp, ldb, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-4, info); EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn(handle, op, N, nrhs, dAp, lda, nullptr, dBp, ldb, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-6, info); EXPECT_HIPBLAS_STATUS( hipblasGetrsBatchedFn( handle, op, N, nrhs, dAp, lda, dIpiv, nullptr, ldb, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-7, info); #endif return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_getrs_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetrsBatchedFn = FORTRAN ? hipblasGetrsBatched : hipblasGetrsBatched; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int batch_count = arg.batch_count; hipblasStride strideP = N; size_t A_size = size_t(lda) * N; size_t B_size = size_t(ldb) * 1; size_t Ipiv_size = strideP * batch_count; // Check to prevent memory allocation error if(N < 0 || lda < N || ldb < N || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hX(B_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hB1(B_size, 1, batch_count); host_vector hIpiv(Ipiv_size); host_vector hIpiv1(Ipiv_size); int info; device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_vector dIpiv(Ipiv_size); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); hipblasOperation_t op = HIPBLAS_OP_N; EXPECT_HIPBLAS_STATUS( setup_getrs_batched_testing(hA, hB, hX, hIpiv, dA, dB, dIpiv, N, lda, ldb, batch_count), HIPBLAS_STATUS_SUCCESS); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGetrsBatchedFn(handle, op, N, 1, dA.ptr_on_device(), lda, dIpiv, dB.ptr_on_device(), ldb, &info, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hB1.transfer_from(dB)); CHECK_HIP_ERROR( hipMemcpy(hIpiv1.data(), dIpiv, Ipiv_size * sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_getrs('N', N, 1, hA[b], lda, hIpiv.data() + b * strideP, hB[b], ldb); } hipblas_error = norm_check_general('F', N, 1, ldb, hB, hB1, batch_count); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = N * eps * 100; int zero = 0; unit_check_error(hipblas_error, tolerance); unit_check_general(1, 1, 1, &zero, &info); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetrsBatchedFn(handle, op, N, 1, dA.ptr_on_device(), lda, dIpiv, dB.ptr_on_device(), ldb, &info, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetrsBatchedModel{}.log_args(std::cout, arg, gpu_time_used, getrs_gflop_count(N, 1), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_getrs_strided_batched.hpp000066400000000000000000000524711434647641600256000ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" using hipblasGetrsStridedBatchedModel = ArgumentModel; inline void testname_getrs_strided_batched(const Arguments& arg, std::string& name) { hipblasGetrsStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t setup_getrs_strided_batched_testing(host_vector& hA, host_vector& hB, host_vector& hX, host_vector& hIpiv, device_vector& dA, device_vector& dB, device_vector& dIpiv, int N, int lda, int ldb, hipblasStride strideA, hipblasStride strideB, hipblasStride strideP, int batch_count) { size_t A_size = strideA * batch_count; size_t B_size = strideB * batch_count; size_t Ipiv_size = strideP * batch_count; // Initial hA, hB, hX on CPU srand(1); hipblasOperation_t op = HIPBLAS_OP_N; for(int b = 0; b < batch_count; b++) { T* hAb = hA.data() + b * strideA; T* hXb = hX.data() + b * strideB; T* hBb = hB.data() + b * strideB; int* hIpivb = hIpiv.data() + b * strideP; hipblas_init(hAb, N, N, lda); hipblas_init(hXb, N, 1, ldb); // scale A to avoid singularities for(int i = 0; i < N; i++) { for(int j = 0; j < N; j++) { if(i == j) hAb[i + j * lda] += 400; else hAb[i + j * lda] -= 4; } } // Calculate hB = hA*hX; cblas_gemm(op, op, N, 1, N, (T)1, hAb, lda, hXb, ldb, (T)0, hBb, ldb); // LU factorize hA on the CPU int info = cblas_getrf(N, N, hAb, lda, hIpivb); if(info != 0) { std::cerr << "LU decomposition failed" << std::endl; return HIPBLAS_STATUS_INTERNAL_ERROR; } } // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, A_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, B_size * sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dIpiv, hIpiv, Ipiv_size * sizeof(int), hipMemcpyHostToDevice)); return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_getrs_strided_batched_bad_arg(const Arguments& arg) { auto hipblasGetrsStridedBatchedFn = arg.fortran ? hipblasGetrsStridedBatched : hipblasGetrsStridedBatched; hipblasLocalHandle handle(arg); const int N = 100; const int nrhs = 1; const int lda = 101; const int ldb = 102; const int batch_count = 2; hipblasStride strideA = size_t(lda) * N; hipblasStride strideB = size_t(ldb) * 1; hipblasStride strideP = size_t(N); size_t A_size = strideA * batch_count; size_t B_size = strideB * batch_count; size_t Ipiv_size = strideP * batch_count; const hipblasOperation_t op = HIPBLAS_OP_N; host_vector hA(A_size); host_vector hB(B_size); host_vector hX(B_size); host_vector hIpiv(Ipiv_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dIpiv(Ipiv_size); int info = 0; // Need initialization code because even with bad params we call roc/cu-solver // so want to give reasonable data EXPECT_HIPBLAS_STATUS( setup_getrs_strided_batched_testing( hA, hB, hX, hIpiv, dA, dB, dIpiv, N, lda, ldb, strideA, strideB, strideP, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, N, nrhs, dA, lda, strideA, dIpiv, strideP, dB, ldb, strideB, nullptr, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, -1, nrhs, dA, lda, strideA, dIpiv, strideP, dB, ldb, strideB, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-2, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, N, -1, dA, lda, strideA, dIpiv, strideP, dB, ldb, strideB, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-3, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, N, nrhs, nullptr, lda, strideA, dIpiv, strideP, dB, ldb, strideB, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-4, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, N, nrhs, dA, N - 1, strideA, dIpiv, strideP, dB, ldb, strideB, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-5, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, N, nrhs, dA, lda, strideA, nullptr, strideP, dB, ldb, strideB, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-7, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, N, nrhs, dA, lda, strideA, dIpiv, strideP, nullptr, ldb, strideB, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-9, info); EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, N, nrhs, dA, lda, strideA, dIpiv, strideP, dB, N - 1, strideB, &info, batch_count), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-10, info); EXPECT_HIPBLAS_STATUS( hipblasGetrsStridedBatchedFn( handle, op, N, nrhs, dA, lda, strideA, dIpiv, strideP, dB, ldb, strideB, &info, -1), HIPBLAS_STATUS_INVALID_VALUE); EXPECT_EQ(-13, info); // If N == 0, A, B, and ipiv can be nullptr EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, 0, nrhs, nullptr, lda, strideA, nullptr, strideP, nullptr, ldb, strideB, &info, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // if nrhs == 0, B can be nullptr EXPECT_HIPBLAS_STATUS(hipblasGetrsStridedBatchedFn(handle, op, N, 0, dA, lda, strideA, dIpiv, strideP, nullptr, ldb, strideB, &info, batch_count), HIPBLAS_STATUS_SUCCESS); EXPECT_EQ(0, info); // can't make any assumptions about ptrs when batch_count < 0, this is handled by rocSOLVER return HIPBLAS_STATUS_SUCCESS; } template inline hipblasStatus_t testing_getrs_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasGetrsStridedBatchedFn = FORTRAN ? hipblasGetrsStridedBatched : hipblasGetrsStridedBatched; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasStride strideA = size_t(lda) * N * stride_scale; hipblasStride strideB = size_t(ldb) * 1 * stride_scale; hipblasStride strideP = size_t(N) * stride_scale; size_t A_size = strideA * batch_count; size_t B_size = strideB * batch_count; size_t Ipiv_size = strideP * batch_count; // Check to prevent memory allocation error if(N < 0 || lda < N || ldb < N || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hX(B_size); host_vector hB(B_size); host_vector hB1(B_size); host_vector hIpiv(Ipiv_size); host_vector hIpiv1(Ipiv_size); int info; device_vector dA(A_size); device_vector dB(B_size); device_vector dIpiv(Ipiv_size); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); hipblasOperation_t op = HIPBLAS_OP_N; EXPECT_HIPBLAS_STATUS( setup_getrs_strided_batched_testing( hA, hB, hX, hIpiv, dA, dB, dIpiv, N, lda, ldb, strideA, strideB, strideP, batch_count), HIPBLAS_STATUS_SUCCESS); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasGetrsStridedBatchedFn(handle, op, N, 1, dA, lda, strideA, dIpiv, strideP, dB, ldb, strideB, &info, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hB1.data(), dB, B_size * sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(hIpiv1.data(), dIpiv, Ipiv_size * sizeof(int), hipMemcpyDeviceToHost)); /* ===================================================================== CPU LAPACK =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_getrs('N', N, 1, hA.data() + b * strideA, lda, hIpiv.data() + b * strideP, hB.data() + b * strideB, ldb); } hipblas_error = norm_check_general('F', N, 1, ldb, strideB, hB, hB1, batch_count); if(arg.unit_check) { U eps = std::numeric_limits::epsilon(); double tolerance = N * eps * 100; int zero = 0; unit_check_error(hipblas_error, tolerance); unit_check_general(1, 1, 1, &zero, &info); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasGetrsStridedBatchedFn(handle, op, N, 1, dA, lda, strideA, dIpiv, strideP, dB, ldb, strideB, &info, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasGetrsStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, getrs_gflop_count(N, 1), ArgumentLogging::NA_value, hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hbmv.hpp000066400000000000000000000164011434647641600222110ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHbmvModel = ArgumentModel; inline void testname_hbmv(const Arguments& arg, std::string& name) { hipblasHbmvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hbmv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHbmvFn = FORTRAN ? hipblasHbmv : hipblasHbmv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int K = arg.K; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || K < 0 || lda <= K || !incx || !incy; if(invalid_size || !N) { hipblasStatus_t actual = hipblasHbmvFn( handle, uplo, N, K, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t A_size = size_t(lda) * N; size_t x_size = size_t(N) * abs_incx; size_t y_size = size_t(N) * abs_incy; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(x_size); host_vector hy(y_size); host_vector hy_cpu(y_size); host_vector hy_host(y_size); host_vector hy_device(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix(hA, arg, K, N, lda, 0, 1, hipblas_client_alpha_sets_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hz = hy: save a copy in hz which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHbmvFn( handle, uplo, N, K, (T*)&h_alpha, dA, lda, dx, incx, (T*)&h_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasHbmvFn(handle, uplo, N, K, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_hbmv( uplo, N, K, h_alpha, hA.data(), lda, hx.data(), incx, h_beta, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasHbmvFn(handle, uplo, N, K, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHbmvModel{}.log_args(std::cout, arg, gpu_time_used, hbmv_gflop_count(N, K), hbmv_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hbmv_batched.hpp000066400000000000000000000235511434647641600236670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHbmvBatchedModel = ArgumentModel; inline void testname_hbmv_batched(const Arguments& arg, std::string& name) { hipblasHbmvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hbmv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHbmvBatchedFn = FORTRAN ? hipblasHbmvBatched : hipblasHbmvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int K = arg.K; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; size_t A_size = size_t(lda) * N; int abs_incy = incy >= 0 ? incy : -incy; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || K < 0 || lda <= K || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHbmvBatchedFn(handle, uplo, N, K, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); host_batch_vector hy_cpu(N, incy, batch_count); host_batch_vector hy_host(N, incy, batch_count); host_batch_vector hy_device(N, incy, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, hipblas_client_beta_sets_nan); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHbmvBatchedFn(handle, uplo, N, K, (T*)&h_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, (T*)&h_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHbmvBatchedFn(handle, uplo, N, K, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hbmv(uplo, N, K, h_alpha, hA[b], lda, hx[b], incx, h_beta, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dy.transfer_from(hy)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHbmvBatchedFn(handle, uplo, N, K, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHbmvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hbmv_gflop_count(N, K), hbmv_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hbmv_strided_batched.hpp000066400000000000000000000307231434647641600254040ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHbmvStridedBatchedModel = ArgumentModel; inline void testname_hbmv_strided_batched(const Arguments& arg, std::string& name) { hipblasHbmvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hbmv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHbmvStridedBatchedFn = FORTRAN ? hipblasHbmvStridedBatched : hipblasHbmvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int K = arg.K; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stride_A = size_t(lda) * N * stride_scale; hipblasStride stride_x = size_t(N) * abs_incx * stride_scale; hipblasStride stride_y = size_t(N) * abs_incy * stride_scale; size_t A_size = stride_A * batch_count; size_t X_size = stride_x * batch_count; size_t Y_size = stride_y * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || K < 0 || lda <= K || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHbmvStridedBatchedFn(handle, uplo, N, K, nullptr, nullptr, lda, stride_A, nullptr, incx, stride_x, nullptr, nullptr, incy, stride_y, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_cpu(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix( hA, arg, K, N, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, stride_y, batch_count, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hy_cpu = hy: save a copy in hy_cpu which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHbmvStridedBatchedFn(handle, uplo, N, K, (T*)&h_alpha, dA, lda, stride_A, dx, incx, stride_x, (T*)&h_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHbmvStridedBatchedFn(handle, uplo, N, K, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hbmv(uplo, N, K, h_alpha, hA.data() + b * stride_A, lda, hx.data() + b * stride_x, incx, h_beta, hy_cpu.data() + b * stride_y, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, stride_y, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incy, stride_y, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, N, abs_incy, stride_y, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, N, abs_incy, stride_y, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHbmvStridedBatchedFn(handle, uplo, N, K, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHbmvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hbmv_gflop_count(N, K), hbmv_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hemm.hpp000066400000000000000000000155461434647641600222140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHemmModel = ArgumentModel; inline void testname_hemm(const Arguments& arg, std::string& name) { hipblasHemmModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hemm(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHemmFn = FORTRAN ? hipblasHemm : hipblasHemm; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); size_t rows = (side == HIPBLAS_SIDE_LEFT ? N : M); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || ldc < M || ldb < M || lda < K) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; size_t C_size = size_t(ldc) * N; host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix(hA, arg, rows, K, lda, 0, 1, hipblas_client_never_set_nan, true); hipblas_init_matrix(hB, arg, M, N, ldb, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_matrix(hC_host, arg, M, N, ldc, 0, 1, hipblas_client_beta_sets_nan); hC_gold = hC_host; hC_device = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasHemmFn(handle, side, uplo, M, N, &h_alpha, dA, lda, dB, ldb, &h_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasHemmFn(handle, side, uplo, M, N, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_hemm( side, uplo, M, N, h_alpha, hA.data(), lda, hB.data(), ldb, h_beta, hC_gold.data(), ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, ldc, hC_gold, hC_host); unit_check_general(M, N, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, hC_gold, hC_host); hipblas_error_device = norm_check_general('F', M, N, ldc, hC_gold, hC_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHemmFn( handle, side, uplo, M, N, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHemmModel{}.log_args(std::cout, arg, gpu_time_used, hemm_gflop_count(M, N, K), hemm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hemm_batched.hpp000066400000000000000000000225651434647641600236650ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHemmBatchedModel = ArgumentModel; inline void testname_hemm_batched(const Arguments& arg, std::string& name) { hipblasHemmBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hemm_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHemmBatchedFn = FORTRAN ? hipblasHemmBatched : hipblasHemmBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; int batch_count = arg.batch_count; size_t rows = (side == HIPBLAS_SIDE_LEFT ? N : M); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(M < 0 || N < 0 || lda < K || ldb < M || ldc < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; size_t C_size = size_t(ldc) * N; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // host arrays host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hC_host(C_size, 1, batch_count); host_batch_vector hC_device(C_size, 1, batch_count); host_batch_vector hC_gold(C_size, 1, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hB, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hC_host, arg, hipblas_client_beta_sets_nan); hC_device.copy_from(hC_host); hC_gold.copy_from(hC_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHemmBatchedFn(handle, side, uplo, M, N, &h_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, &h_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); CHECK_HIP_ERROR(dC.transfer_from(hC_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHemmBatchedFn(handle, side, uplo, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hemm( side, uplo, M, N, h_alpha, hA[b], lda, hB[b], ldb, h_beta, hC_gold[b], ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, hC_gold, hC_host); unit_check_general(M, N, batch_count, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHemmBatchedFn(handle, side, uplo, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHemmBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hemm_gflop_count(M, N, K), hemm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hemm_strided_batched.hpp000066400000000000000000000266711434647641600254050ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHemmStridedBatchedModel = ArgumentModel; inline void testname_hemm_strided_batched(const Arguments& arg, std::string& name) { hipblasHemmStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hemm_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHemmStridedBatchedFn = FORTRAN ? hipblasHemmStridedBatched : hipblasHemmStridedBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; size_t rows = (side == HIPBLAS_SIDE_LEFT ? N : M); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(M < 0 || N < 0 || lda < K || ldb < M || ldc < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } hipblasStride stride_A = size_t(lda) * K * stride_scale; hipblasStride stride_B = size_t(ldb) * N * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; size_t A_size = size_t(stride_A) * batch_count; size_t B_size = size_t(stride_B) * batch_count; size_t C_size = size_t(stride_C) * batch_count; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix( hA, arg, rows, K, lda, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_matrix( hB, arg, M, N, ldb, stride_B, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_matrix( hC_host, arg, M, N, ldc, stride_C, batch_count, hipblas_client_beta_sets_nan); hC_gold = hC_host; hC_device = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHemmStridedBatchedFn(handle, side, uplo, M, N, &h_alpha, dA, lda, stride_A, dB, ldb, stride_B, &h_beta, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHemmStridedBatchedFn(handle, side, uplo, M, N, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hemm(side, uplo, M, N, h_alpha, hA.data() + b * stride_A, lda, hB.data() + b * stride_B, ldb, h_beta, hC_gold.data() + b * stride_C, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, stride_C, hC_gold, hC_host); unit_check_general(M, N, batch_count, ldc, stride_C, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, stride_C, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, stride_C, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHemmStridedBatchedFn(handle, side, uplo, M, N, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHemmStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hemm_gflop_count(M, N, K), hemm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hemv.hpp000066400000000000000000000162301434647641600222140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHemvModel = ArgumentModel; inline void testname_hemv(const Arguments& arg, std::string& name) { hipblasHemvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hemv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHemvFn = FORTRAN ? hipblasHemv : hipblasHemv; int N = arg.N; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t A_size = size_t(lda) * N; size_t X_size = size_t(abs_incx) * N; size_t Y_size = size_t(abs_incy) * N; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || lda < N || lda < 1 || !incx || !incy; if(invalid_size || !N) { hipblasStatus_t actual = hipblasHemvFn( handle, uplo, N, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_cpu(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix(hA, arg, N, N, lda, 0, 1, hipblas_client_alpha_sets_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hy_cpu = hy: save a copy in hy_cpu which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasHemvFn(handle, uplo, N, (T*)&h_alpha, dA, lda, dx, incx, (T*)&h_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasHemvFn(handle, uplo, N, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_hemv( uplo, N, h_alpha, hA.data(), lda, hx.data(), incx, h_beta, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasHemvFn(handle, uplo, N, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHemvModel{}.log_args(std::cout, arg, gpu_time_used, hemv_gflop_count(N), hemv_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hemv_batched.hpp000066400000000000000000000227451434647641600236760ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHemvBatchedModel = ArgumentModel; inline void testname_hemv_batched(const Arguments& arg, std::string& name) { hipblasHemvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hemv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHemvBatchedFn = FORTRAN ? hipblasHemvBatched : hipblasHemvBatched; int N = arg.N; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; int abs_incy = incy >= 0 ? incy : -incy; size_t A_size = size_t(lda) * N; int batch_count = arg.batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || lda < N || lda < 1 || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHemvBatchedFn(handle, uplo, N, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); host_batch_vector hy_host(N, incy, batch_count); host_batch_vector hy_device(N, incy, batch_count); host_batch_vector hy_cpu(N, incy, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, hipblas_client_beta_sets_nan); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHemvBatchedFn(handle, uplo, N, (T*)&h_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, (T*)&h_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHemvBatchedFn(handle, uplo, N, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hemv(uplo, N, h_alpha, hA[b], lda, hx[b], incx, h_beta, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dy.transfer_from(hy)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHemvBatchedFn(handle, uplo, N, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHemvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hemv_gflop_count(N), hemv_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hemv_strided_batched.hpp000066400000000000000000000271401434647641600254060ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHemvStridedBatchedModel = ArgumentModel; inline void testname_hemv_strided_batched(const Arguments& arg, std::string& name) { hipblasHemvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hemv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHemvStridedBatchedFn = FORTRAN ? hipblasHemvStridedBatched : hipblasHemvStridedBatched; int N = arg.N; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stride_A = size_t(lda) * N * stride_scale; hipblasStride stride_x = size_t(N) * abs_incx * stride_scale; hipblasStride stride_y = size_t(N) * abs_incy * stride_scale; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); size_t A_size = stride_A * batch_count; size_t X_size = stride_x * batch_count; size_t Y_size = stride_y * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || lda < N || lda < 1 || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHemvStridedBatchedFn(handle, uplo, N, nullptr, nullptr, lda, stride_A, nullptr, incx, stride_x, nullptr, nullptr, incy, stride_y, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_cpu(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix( hA, arg, N, N, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, stride_y, batch_count, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hy_cpu = hy: save a copy in hy_cpu which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHemvStridedBatchedFn(handle, uplo, N, (T*)&h_alpha, dA, lda, stride_A, dx, incx, stride_x, (T*)&h_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHemvStridedBatchedFn(handle, uplo, N, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hemv(uplo, N, h_alpha, hA.data() + b * stride_A, lda, hx.data() + b * stride_x, incx, h_beta, hy_cpu.data() + b * stride_y, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, stride_y, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incy, stride_y, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, N, abs_incy, stride_y, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, N, abs_incy, stride_y, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHemvStridedBatchedFn(handle, uplo, N, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHemvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hemv_gflop_count(N), hemv_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_her.hpp000066400000000000000000000156261434647641600220430ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHerModel = ArgumentModel; inline void testname_her(const Arguments& arg, std::string& name) { hipblasHerModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_her(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHerFn = FORTRAN ? hipblasHer : hipblasHer; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int lda = arg.lda; int abs_incx = incx >= 0 ? incx : -incx; size_t A_size = size_t(lda) * N; size_t x_size = size_t(N) * abs_incx; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || lda < N || lda < 1 || !incx; if(invalid_size || !N) { hipblasStatus_t actual = hipblasHerFn(handle, uplo, N, nullptr, nullptr, incx, nullptr, lda); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); device_vector dA(A_size); device_vector dx(x_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; U h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix(hA, arg, N, N, lda, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); // copy matrix is easy in STL; hA_cpu = hA: save a copy in hA_cpu which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHerFn(handle, uplo, N, (U*)&h_alpha, dx, incx, dA, lda)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * N * lda, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * N * lda, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHerFn(handle, uplo, N, d_alpha, dx, incx, dA, lda)); CHECK_HIP_ERROR( hipMemcpy(hA_device.data(), dA, sizeof(T) * N * lda, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_her(uplo, N, h_alpha, hx.data(), incx, hA_cpu.data(), lda); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, lda, hA_cpu.data(), hA_host.data()); // NOTE: on cuBLAS, with alpha == 0 and alpha on the device, there is not a quick-return, // instead, the imaginary part of the diagonal elements are set to 0. in rocBLAS, we are quick-returning // as well as in our reference code. For this reason, I've disabled the check here. if(h_alpha) unit_check_general(N, N, lda, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, lda, hA_cpu, hA_host); hipblas_error_device = norm_check_general('F', N, N, lda, hA_cpu, hA_device); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * lda * N, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHerFn(handle, uplo, N, d_alpha, dx, incx, dA, lda)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHerModel{}.log_args(std::cout, arg, gpu_time_used, her_gflop_count(N), her_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_her2.hpp000066400000000000000000000165201434647641600221170ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHer2Model = ArgumentModel; inline void testname_her2(const Arguments& arg, std::string& name) { hipblasHer2Model{}.test_name(arg, name); } template inline hipblasStatus_t testing_her2(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHer2Fn = FORTRAN ? hipblasHer2 : hipblasHer2; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int lda = arg.lda; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t A_size = size_t(lda) * N; size_t x_size = size_t(N) * abs_incx; size_t y_size = size_t(N) * abs_incy; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || lda < N || lda < 1; if(invalid_size || !N) { hipblasStatus_t actual = hipblasHer2Fn(handle, uplo, N, nullptr, nullptr, incx, nullptr, incy, nullptr, lda); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix(hA, arg, N, N, lda, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan); // copy matrix is easy in STL; hA_cpu = hA: save a copy in hA_cpu which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasHer2Fn(handle, uplo, N, (T*)&h_alpha, dx, incx, dy, incy, dA, lda)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHer2Fn(handle, uplo, N, d_alpha, dx, incx, dy, incy, dA, lda)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_her2(uplo, N, h_alpha, hx.data(), incx, hy.data(), incy, hA_cpu.data(), lda); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, lda, hA_cpu.data(), hA_host.data()); // NOTE: on cuBLAS, with alpha == 0 and alpha on the device, there is not a quick-return, // instead, the imaginary part of the diagonal elements are set to 0. in rocBLAS, we are quick-returning // as well as in our reference code. For this reason, I've disabled the check here. if(h_alpha != 0) unit_check_general(N, N, lda, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, lda, hA_cpu, hA_host); hipblas_error_device = norm_check_general('F', N, N, lda, hA_cpu, hA_device); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasHer2Fn(handle, uplo, N, d_alpha, dx, incx, dy, incy, dA, lda)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHer2Model{}.log_args(std::cout, arg, gpu_time_used, her2_gflop_count(N), her2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_her2_batched.hpp000066400000000000000000000210451434647641600235670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHer2BatchedModel = ArgumentModel; inline void testname_her2_batched(const Arguments& arg, std::string& name) { hipblasHer2BatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_her2_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHer2BatchedFn = FORTRAN ? hipblasHer2Batched : hipblasHer2Batched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int lda = arg.lda; int batch_count = arg.batch_count; size_t A_size = size_t(lda) * N; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || lda < N || lda < 1 || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHer2BatchedFn( handle, uplo, N, nullptr, nullptr, incx, nullptr, incy, nullptr, lda, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_cpu(A_size, 1, batch_count); host_batch_vector hA_host(A_size, 1, batch_count); host_batch_vector hA_device(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, hipblas_client_alpha_sets_nan); hA_cpu.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHer2BatchedFn(handle, uplo, N, (T*)&h_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_host.transfer_from(dA)); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHer2BatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_device.transfer_from(dA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_her2(uplo, N, h_alpha, hx[b], incx, hy[b], incy, hA_cpu[b], lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, lda, hA_cpu, hA_host); unit_check_general(N, N, batch_count, lda, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, lda, hA_cpu, hA_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, lda, hA_cpu, hA_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dA.transfer_from(hA)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHer2BatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), lda, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHer2BatchedModel{}.log_args(std::cout, arg, gpu_time_used, her2_gflop_count(N), her2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_her2_strided_batched.hpp000066400000000000000000000263731434647641600253160ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHer2StridedBatchedModel = ArgumentModel; inline void testname_her2_strided_batched(const Arguments& arg, std::string& name) { hipblasHer2StridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_her2_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHer2StridedBatchedFn = FORTRAN ? hipblasHer2StridedBatched : hipblasHer2StridedBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stride_A = size_t(lda) * N * stride_scale; hipblasStride stride_x = size_t(N) * abs_incx * stride_scale; hipblasStride stride_y = size_t(N) * abs_incy * stride_scale; size_t A_size = stride_A * batch_count; size_t x_size = stride_x * batch_count; size_t y_size = stride_y * batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || lda < N || lda < 1 || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHer2StridedBatchedFn(handle, uplo, N, nullptr, nullptr, incx, stride_x, nullptr, incy, stride_y, nullptr, lda, stride_A, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix( hA, arg, N, N, lda, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, stride_y, batch_count, hipblas_client_alpha_sets_nan); // copy matrix is easy in STL; hA_cpu = hA: save a copy in hA_cpu which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHer2StridedBatchedFn(handle, uplo, N, (T*)&h_alpha, dx, incx, stride_x, dy, incy, stride_y, dA, lda, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHer2StridedBatchedFn(handle, uplo, N, d_alpha, dx, incx, stride_x, dy, incy, stride_y, dA, lda, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_her2(uplo, N, h_alpha, hx.data() + b * stride_x, incx, hy.data() + b * stride_y, incy, hA_cpu.data() + b * stride_A, lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, lda, stride_A, hA_cpu.data(), hA_host.data()); unit_check_general( N, N, batch_count, lda, stride_A, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', N, N, lda, stride_A, hA_cpu.data(), hA_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', N, N, lda, stride_A, hA_cpu.data(), hA_device.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHer2StridedBatchedFn(handle, uplo, N, d_alpha, dx, incx, stride_x, dy, incy, stride_y, dA, lda, stride_A, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHer2StridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, her2_gflop_count(N), her2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_her2k.hpp000066400000000000000000000160661434647641600222770ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHer2kModel = ArgumentModel; inline void testname_her2k(const Arguments& arg, std::string& name) { hipblasHer2kModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_her2k(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHer2kFn = FORTRAN ? hipblasHer2k : hipblasHer2k; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K))) { return HIPBLAS_STATUS_INVALID_VALUE; } int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t B_size = size_t(ldb) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); U h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix(hA, arg, N, K1, lda, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix(hB, arg, N, K1, ldb, 0, 1, hipblas_client_never_set_nan, false, true); hipblas_init_matrix(hC_host, arg, N, N, ldc, 0, 1, hipblas_client_never_set_nan); // copy matrix is easy in STL; hB = hA: save a copy in hB which will be output of CPU BLAS hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHer2kFn( handle, uplo, transA, N, K, &h_alpha, dA, lda, dB, ldb, &h_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasHer2kFn(handle, uplo, transA, N, K, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_her2k(uplo, transA, N, K, h_alpha, hA, lda, hB, ldb, h_beta, hC_gold, ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, ldc, hC_gold, hC_host); unit_check_general(N, N, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHer2kFn( handle, uplo, transA, N, K, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHer2kModel{}.log_args(std::cout, arg, gpu_time_used, her2k_gflop_count(N, K), her2k_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_her2k_batched.hpp000066400000000000000000000234631434647641600237500ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHer2kBatchedModel = ArgumentModel; inline void testname_her2k_batched(const Arguments& arg, std::string& name) { hipblasHer2kBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_her2k_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHer2kBatchedFn = FORTRAN ? hipblasHer2kBatched : hipblasHer2kBatched; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; int batch_count = arg.batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); T h_alpha = arg.get_alpha(); U h_beta = arg.get_beta(); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K)) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t B_size = size_t(ldb) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hC_host(C_size, 1, batch_count); host_batch_vector hC_device(C_size, 1, batch_count); host_batch_vector hC_gold(C_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hB, arg, hipblas_client_never_set_nan, false, true); hipblas_init_vector(hC_host, arg, hipblas_client_never_set_nan); hC_device.copy_from(hC_host); hC_gold.copy_from(hC_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHer2kBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, &h_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); CHECK_HIP_ERROR(dC.transfer_from(hC_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHer2kBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_her2k( uplo, transA, N, K, h_alpha, hA[b], lda, hB[b], ldb, h_beta, hC_gold[b], ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHer2kBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHer2kBatchedModel{}.log_args(std::cout, arg, gpu_time_used, her2k_gflop_count(N, K), her2k_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_her2k_strided_batched.hpp000066400000000000000000000273401434647641600254640ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHer2kStridedBatchedModel = ArgumentModel; inline void testname_her2k_strided_batched(const Arguments& arg, std::string& name) { hipblasHer2kStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_her2k_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHer2kStridedBatchedFn = FORTRAN ? hipblasHer2kStridedBatched : hipblasHer2kStridedBatched; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int K1 = (transA == HIPBLAS_OP_N ? K : N); hipblasStride stride_A = size_t(lda) * K1 * stride_scale; hipblasStride stride_B = size_t(ldb) * K1 * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t B_size = stride_B * batch_count; size_t C_size = stride_C * batch_count; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K)) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); U h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix( hA, arg, N, K1, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix( hB, arg, N, K1, ldb, stride_B, batch_count, hipblas_client_never_set_nan, false, true); hipblas_init_matrix( hC_host, arg, N, N, ldc, stride_C, batch_count, hipblas_client_never_set_nan); hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHer2kStridedBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA, lda, stride_A, dB, ldb, stride_B, &h_beta, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHer2kStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_her2k(uplo, transA, N, K, h_alpha, hA.data() + b * stride_A, lda, hB.data() + b * stride_B, ldb, h_beta, hC_gold.data() + b * stride_C, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHer2kStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHer2kStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, her2k_gflop_count(N, K), her2k_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_her_batched.hpp000066400000000000000000000176231434647641600235140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHerBatchedModel = ArgumentModel; inline void testname_her_batched(const Arguments& arg, std::string& name) { hipblasHerBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_her_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHerBatchedFn = FORTRAN ? hipblasHerBatched : hipblasHerBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int lda = arg.lda; int batch_count = arg.batch_count; size_t A_size = size_t(lda) * N; double gpu_time_used, hipblas_error_host, hipblas_error_device; U h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || lda < N || lda < 1 || !incx || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHerBatchedFn( handle, uplo, N, nullptr, nullptr, incx, nullptr, lda, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_cpu(A_size, 1, batch_count); host_batch_vector hA_host(A_size, 1, batch_count); host_batch_vector hA_device(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hA_cpu.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHerBatchedFn(handle, uplo, N, (U*)&h_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_host.transfer_from(dA)); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHerBatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_device.transfer_from(dA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_her(uplo, N, h_alpha, hx[b], incx, hA_cpu[b], lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, lda, hA_cpu, hA_host); unit_check_general(N, N, batch_count, lda, hA_cpu, hA_host); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, lda, hA_cpu, hA_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, lda, hA_cpu, hA_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dA.transfer_from(hA)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHerBatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), lda, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHerBatchedModel{}.log_args(std::cout, arg, gpu_time_used, her_gflop_count(N), her_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_her_strided_batched.hpp000066400000000000000000000174211434647641600252260ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHerStridedBatchedModel = ArgumentModel; inline void testname_her_strided_batched(const Arguments& arg, std::string& name) { hipblasHerStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_her_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHerStridedBatchedFn = FORTRAN ? hipblasHerStridedBatched : hipblasHerStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; hipblasStride stride_A = size_t(lda) * N * stride_scale; hipblasStride stride_x = size_t(N) * abs_incx * stride_scale; size_t A_size = stride_A * batch_count; size_t x_size = stride_x * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || lda < N || lda < 1 || !incx || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHerStridedBatchedFn( handle, uplo, N, nullptr, nullptr, incx, stride_x, nullptr, lda, stride_A, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); device_vector dA(A_size); device_vector dx(x_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; U h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix( hA, arg, N, N, lda, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); // copy matrix is easy in STL; hA_cpu = hA: save a copy in hA_cpu which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHerStridedBatchedFn( handle, uplo, N, (U*)&h_alpha, dx, incx, stride_x, dA, lda, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHerStridedBatchedFn( handle, uplo, N, d_alpha, dx, incx, stride_x, dA, lda, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_her(uplo, N, h_alpha, hx.data() + b * stride_x, incx, hA_cpu.data() + b * stride_A, lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, lda, stride_A, hA_cpu.data(), hA_host.data()); unit_check_general( N, N, batch_count, lda, stride_A, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', N, N, lda, stride_A, hA_cpu.data(), hA_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', N, N, lda, stride_A, hA_cpu.data(), hA_device.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHerStridedBatchedFn( handle, uplo, N, d_alpha, dx, incx, stride_x, dA, lda, stride_A, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHerStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, her_gflop_count(N), her_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_herk.hpp000066400000000000000000000151111434647641600222030ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHerkModel = ArgumentModel; inline void testname_herk(const Arguments& arg, std::string& name) { hipblasHerkModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_herk(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHerkFn = FORTRAN ? hipblasHerk : hipblasHerk; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldc = arg.ldc; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && lda < N) || (transA != HIPBLAS_OP_N && lda < K)) { return HIPBLAS_STATUS_INVALID_VALUE; } int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); U h_alpha = arg.get_alpha(); U h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix(hA, arg, N, K1, lda, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix(hC_host, arg, N, N, ldc, 0, 1, hipblas_client_beta_sets_nan, false, true); hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasHerkFn(handle, uplo, transA, N, K, &h_alpha, dA, lda, &h_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasHerkFn(handle, uplo, transA, N, K, d_alpha, dA, lda, d_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_herk(uplo, transA, N, K, h_alpha, hA, lda, h_beta, hC_gold, ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, ldc, hC_gold, hC_host); unit_check_general(N, N, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasHerkFn(handle, uplo, transA, N, K, d_alpha, dA, lda, d_beta, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHerkModel{}.log_args(std::cout, arg, gpu_time_used, herk_gflop_count(N, K), herk_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_herk_batched.hpp000066400000000000000000000211361434647641600236610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHerkBatchedModel = ArgumentModel; inline void testname_herk_batched(const Arguments& arg, std::string& name) { hipblasHerkBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_herk_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHerkBatchedFn = FORTRAN ? hipblasHerkBatched : hipblasHerkBatched; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldc = arg.ldc; int batch_count = arg.batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); U h_alpha = arg.get_alpha(); U h_beta = arg.get_beta(); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && lda < N) || (transA != HIPBLAS_OP_N && lda < K) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hC_host(C_size, 1, batch_count); host_batch_vector hC_device(C_size, 1, batch_count); host_batch_vector hC_gold(C_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hC_host, arg, hipblas_client_beta_sets_nan, false, true); hC_device.copy_from(hC_host); hC_gold.copy_from(hC_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHerkBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA.ptr_on_device(), lda, &h_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); CHECK_HIP_ERROR(dC.transfer_from(hC_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHerkBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, d_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_herk(uplo, transA, N, K, h_alpha, hA[b], lda, h_beta, hC_gold[b], ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHerkBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, d_beta, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHerkBatchedModel{}.log_args(std::cout, arg, gpu_time_used, herk_gflop_count(N, K), herk_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_herk_strided_batched.hpp000066400000000000000000000246601434647641600254040ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHerkStridedBatchedModel = ArgumentModel; inline void testname_herk_strided_batched(const Arguments& arg, std::string& name) { hipblasHerkStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_herk_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHerkStridedBatchedFn = FORTRAN ? hipblasHerkStridedBatched : hipblasHerkStridedBatched; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int K1 = (transA == HIPBLAS_OP_N ? K : N); hipblasStride stride_A = size_t(lda) * K1 * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t C_size = stride_C * batch_count; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && lda < N) || (transA != HIPBLAS_OP_N && lda < K) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); U h_alpha = arg.get_alpha(); U h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix( hA, arg, N, K1, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix( hC_host, arg, N, N, ldc, stride_C, batch_count, hipblas_client_beta_sets_nan, false, true); hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHerkStridedBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA, lda, stride_A, &h_beta, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIPBLAS_ERROR(hipblasHerkStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, d_beta, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_herk(uplo, transA, N, K, h_alpha, hA.data() + b * stride_A, lda, h_beta, hC_gold.data() + b * stride_C, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHerkStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, d_beta, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHerkStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, herk_gflop_count(N, K), herk_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_herkx.hpp000066400000000000000000000155271434647641600224060ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHerkxModel = ArgumentModel; inline void testname_herkx(const Arguments& arg, std::string& name) { hipblasHerkxModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_herkx(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHerkxFn = FORTRAN ? hipblasHerkx : hipblasHerkx; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K))) { return HIPBLAS_STATUS_INVALID_VALUE; } int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t B_size = size_t(ldb) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); U h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU srand(1); hipblas_init(hA, N, K1, lda); hipblas_init(hB, N, K1, ldb); hipblas_init(hC_host, N, N, ldc); // hB = hA; hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHerkxFn( handle, uplo, transA, N, K, &h_alpha, dA, lda, dB, ldb, &h_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasHerkxFn(handle, uplo, transA, N, K, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_herkx(uplo, transA, N, K, h_alpha, hA, lda, hB, ldb, h_beta, hC_gold, ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, ldc, hC_gold, hC_host); unit_check_general(N, N, ldc, hC_gold, hC_host); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHerkxFn( handle, uplo, transA, N, K, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHerkxModel{}.log_args(std::cout, arg, gpu_time_used, herkx_gflop_count(N, K), herkx_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_herkx_batched.hpp000066400000000000000000000232471434647641600240560ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHerkxBatchedModel = ArgumentModel; inline void testname_herkx_batched(const Arguments& arg, std::string& name) { hipblasHerkxBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_herkx_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHerkxBatchedFn = FORTRAN ? hipblasHerkxBatched : hipblasHerkxBatched; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; int batch_count = arg.batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); T h_alpha = arg.get_alpha(); U h_beta = arg.get_beta(); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K)) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t B_size = size_t(ldb) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hC_host(C_size, 1, batch_count); host_batch_vector hC_device(C_size, 1, batch_count); host_batch_vector hC_gold(C_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); hipblas_init(hA, true); hipblas_init(hB); hipblas_init(hC_host); hC_device.copy_from(hC_host); hC_gold.copy_from(hC_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHerkxBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, &h_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); CHECK_HIP_ERROR(dC.transfer_from(hC_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHerkxBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_herkx( uplo, transA, N, K, h_alpha, hA[b], lda, hB[b], ldb, h_beta, hC_gold[b], ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHerkxBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHerkxBatchedModel{}.log_args(std::cout, arg, gpu_time_used, herkx_gflop_count(N, K), herkx_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_herkx_strided_batched.hpp000066400000000000000000000271121434647641600255670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHerkxStridedBatchedModel = ArgumentModel; inline void testname_herkx_strided_batched(const Arguments& arg, std::string& name) { hipblasHerkxStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_herkx_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHerkxStridedBatchedFn = FORTRAN ? hipblasHerkxStridedBatched : hipblasHerkxStridedBatched; int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int K1 = (transA == HIPBLAS_OP_N ? K : N); hipblasStride stride_A = size_t(lda) * K1 * stride_scale; hipblasStride stride_B = size_t(ldb) * K1 * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t B_size = stride_B * batch_count; size_t C_size = stride_C * batch_count; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K)) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); U h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU srand(1); hipblas_init(hA, N, K1, lda, stride_A, batch_count); hipblas_init(hB, N, K1, ldb, stride_B, batch_count); hipblas_init(hC_host, N, N, ldc, stride_C, batch_count); hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHerkxStridedBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA, lda, stride_A, dB, ldb, stride_B, &h_beta, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHerkxStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_herkx(uplo, transA, N, K, h_alpha, hA.data() + b * stride_A, lda, hB.data() + b * stride_B, ldb, h_beta, hC_gold.data() + b * stride_C, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHerkxStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasHerkxStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, herkx_gflop_count(N, K), herkx_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hpmv.hpp000066400000000000000000000161721434647641600222340ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHpmvModel = ArgumentModel; inline void testname_hpmv(const Arguments& arg, std::string& name) { hipblasHpmvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hpmv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHpmvFn = FORTRAN ? hipblasHpmv : hipblasHpmv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int incy = arg.incy; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t A_size = size_t(N) * (N + 1) / 2; size_t x_size = size_t(N) * abs_incx; size_t y_size = size_t(N) * abs_incy; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy; if(invalid_size || !N) { hipblasStatus_t actual = hipblasHpmvFn( handle, uplo, N, nullptr, nullptr, nullptr, incx, nullptr, nullptr, incy); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(x_size); host_vector hy(y_size); host_vector hy_cpu(y_size); host_vector hy_host(y_size); host_vector hy_device(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix(hA, arg, A_size, 1, 1, 0, 1, hipblas_client_alpha_sets_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hy_cpu = hy: save a copy in hy_cpu which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasHpmvFn(handle, uplo, N, (T*)&h_alpha, dA, dx, incx, (T*)&h_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasHpmvFn(handle, uplo, N, d_alpha, dA, dx, incx, d_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_hpmv(uplo, N, h_alpha, hA.data(), hx.data(), incx, h_beta, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasHpmvFn(handle, uplo, N, d_alpha, dA, dx, incx, d_beta, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHpmvModel{}.log_args(std::cout, arg, gpu_time_used, hpmv_gflop_count(N), hpmv_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hpmv_batched.hpp000066400000000000000000000214701434647641600237030ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHpmvBatchedModel = ArgumentModel; inline void testname_hpmv_batched(const Arguments& arg, std::string& name) { hipblasHpmvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hpmv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHpmvBatchedFn = FORTRAN ? hipblasHpmvBatched : hipblasHpmvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; int abs_incy = incy >= 0 ? incy : -incy; size_t A_size = size_t(N) * (N + 1) / 2; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHpmvBatchedFn( handle, uplo, N, nullptr, nullptr, nullptr, incx, nullptr, nullptr, incy, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); host_batch_vector hy_cpu(N, incy, batch_count); host_batch_vector hy_host(N, incy, batch_count); host_batch_vector hy_device(N, incy, batch_count); // arrays of pointers-to-device on host device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, hipblas_client_beta_sets_nan); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHpmvBatchedFn(handle, uplo, N, (T*)&h_alpha, dA.ptr_on_device(), dx.ptr_on_device(), incx, (T*)&h_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHpmvBatchedFn(handle, uplo, N, d_alpha, dA.ptr_on_device(), dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hpmv(uplo, N, h_alpha, hA[b], hx[b], incx, h_beta, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dy.transfer_from(hy)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHpmvBatchedFn(handle, uplo, N, d_alpha, dA.ptr_on_device(), dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHpmvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hpmv_gflop_count(N), hpmv_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hpmv_strided_batched.hpp000066400000000000000000000266141434647641600254260ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHpmvStridedBatchedModel = ArgumentModel; inline void testname_hpmv_strided_batched(const Arguments& arg, std::string& name) { hipblasHpmvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hpmv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHpmvStridedBatchedFn = FORTRAN ? hipblasHpmvStridedBatched : hipblasHpmvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t dim_A = size_t(N) * (N + 1) / 2; hipblasStride stride_A = dim_A * stride_scale; hipblasStride stride_x = size_t(N) * abs_incx * stride_scale; hipblasStride stride_y = size_t(N) * abs_incy * stride_scale; size_t A_size = stride_A * batch_count; size_t X_size = stride_x * batch_count; size_t Y_size = stride_y * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHpmvStridedBatchedFn(handle, uplo, N, nullptr, nullptr, stride_A, nullptr, incx, stride_x, nullptr, nullptr, incy, stride_y, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_cpu(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Initial Data on CPU hipblas_init_matrix( hA, arg, dim_A, 1, 1, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, stride_y, batch_count, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hy_cpu = hy: save a copy in hy_cpu which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHpmvStridedBatchedFn(handle, uplo, N, (T*)&h_alpha, dA, stride_A, dx, incx, stride_x, (T*)&h_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHpmvStridedBatchedFn(handle, uplo, N, d_alpha, dA, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hpmv(uplo, N, h_alpha, hA.data() + b * stride_A, hx.data() + b * stride_x, incx, h_beta, hy_cpu.data() + b * stride_y, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incy, stride_y, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incy, stride_y, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, N, abs_incy, stride_y, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, N, abs_incy, stride_y, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHpmvStridedBatchedFn(handle, uplo, N, d_alpha, dA, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHpmvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hpmv_gflop_count(N), hpmv_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hpr.hpp000066400000000000000000000147341434647641600220550ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHprModel = ArgumentModel; inline void testname_hpr(const Arguments& arg, std::string& name) { hipblasHprModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hpr(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHprFn = FORTRAN ? hipblasHpr : hipblasHpr; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int abs_incx = incx >= 0 ? incx : -incx; size_t x_size = size_t(N) * abs_incx; size_t A_size = size_t(N) * (N + 1) / 2; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx; if(invalid_size || !N) { hipblasStatus_t actual = hipblasHprFn(handle, uplo, N, nullptr, nullptr, incx, nullptr); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); device_vector dA(A_size); device_vector dx(x_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; U h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix(hA, arg, A_size, 1, 1, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); // copy matrix is easy in STL; hA_cpu = hA: save a copy in hA_cpu which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHprFn(handle, uplo, N, (U*)&h_alpha, dx, incx, dA)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHprFn(handle, uplo, N, d_alpha, dx, incx, dA)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_hpr(uplo, N, h_alpha, hx.data(), incx, hA_cpu.data()); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_size, 1, hA_cpu.data(), hA_host.data()); unit_check_general(1, A_size, 1, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_host); hipblas_error_device = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_device); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHprFn(handle, uplo, N, d_alpha, dx, incx, dA)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHprModel{}.log_args(std::cout, arg, gpu_time_used, hpr_gflop_count(N), hpr_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hpr2.hpp000066400000000000000000000157301434647641600221340ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHpr2Model = ArgumentModel; inline void testname_hpr2(const Arguments& arg, std::string& name) { hipblasHpr2Model{}.test_name(arg, name); } template inline hipblasStatus_t testing_hpr2(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHpr2Fn = FORTRAN ? hipblasHpr2 : hipblasHpr2; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t x_size = size_t(N) * abs_incx; size_t y_size = size_t(N) * abs_incy; size_t A_size = size_t(N) * (N + 1) / 2; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy; if(invalid_size || !N) { hipblasStatus_t actual = hipblasHpr2Fn(handle, uplo, N, nullptr, nullptr, incx, nullptr, incy, nullptr); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix(hA, arg, A_size, 1, 1, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan); // copy matrix is easy in STL; hA_cpu = hA: save a copy in hA_cpu which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHpr2Fn(handle, uplo, N, (T*)&h_alpha, dx, incx, dy, incy, dA)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHpr2Fn(handle, uplo, N, d_alpha, dx, incx, dy, incy, dA)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_hpr2(uplo, N, h_alpha, hx.data(), incx, hy.data(), incy, hA_cpu.data()); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_size, 1, hA_cpu.data(), hA_host.data()); unit_check_general(1, A_size, 1, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, A_size, 1, hA_cpu.data(), hA_host.data()); hipblas_error_device = norm_check_general('F', 1, A_size, 1, hA_cpu.data(), hA_device.data()); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHpr2Fn(handle, uplo, N, d_alpha, dx, incx, dy, incy, dA)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHpr2Model{}.log_args(std::cout, arg, gpu_time_used, hpr2_gflop_count(N), hpr2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hpr2_batched.hpp000066400000000000000000000205151434647641600236030ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHpr2BatchedModel = ArgumentModel; inline void testname_hpr2_batched(const Arguments& arg, std::string& name) { hipblasHpr2BatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hpr2_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHpr2BatchedFn = FORTRAN ? hipblasHpr2Batched : hipblasHpr2Batched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; size_t A_size = size_t(N) * (N + 1) / 2; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHpr2BatchedFn( handle, uplo, N, nullptr, nullptr, incx, nullptr, incy, nullptr, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_cpu(A_size, 1, batch_count); host_batch_vector hA_host(A_size, 1, batch_count); host_batch_vector hA_device(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, hipblas_client_alpha_sets_nan); hA_cpu.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHpr2BatchedFn(handle, uplo, N, (T*)&h_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), batch_count)); CHECK_HIP_ERROR(hA_host.transfer_from(dA)); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHpr2BatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), batch_count)); CHECK_HIP_ERROR(hA_device.transfer_from(dA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hpr2(uplo, N, h_alpha, hx[b], incx, hy[b], incy, hA_cpu[b]); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_size, batch_count, 1, hA_cpu, hA_host); unit_check_general(1, A_size, batch_count, 1, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_host, batch_count); hipblas_error_device = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dA.transfer_from(hA)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHpr2BatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHpr2BatchedModel{}.log_args(std::cout, arg, gpu_time_used, hpr2_gflop_count(N), hpr2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hpr2_strided_batched.hpp000066400000000000000000000260051434647641600253210ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHpr2StridedBatchedModel = ArgumentModel; inline void testname_hpr2_strided_batched(const Arguments& arg, std::string& name) { hipblasHpr2StridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hpr2_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasHpr2StridedBatchedFn = FORTRAN ? hipblasHpr2StridedBatched : hipblasHpr2StridedBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t dim_A = size_t(N) * (N + 1) / 2; hipblasStride stride_A = dim_A * stride_scale; hipblasStride stride_x = size_t(N) * abs_incx * stride_scale; hipblasStride stride_y = size_t(N) * abs_incy * stride_scale; size_t A_size = stride_A * batch_count; size_t x_size = stride_x * batch_count; size_t y_size = stride_y * batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHpr2StridedBatchedFn(handle, uplo, N, nullptr, nullptr, incx, stride_x, nullptr, incy, stride_y, nullptr, stride_A, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix( hA, arg, dim_A, 1, 1, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, stride_y, batch_count, hipblas_client_alpha_sets_nan); // copy matrix is easy in STL; hA_cpu = hA: save a copy in hA_cpu which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHpr2StridedBatchedFn(handle, uplo, N, (T*)&h_alpha, dx, incx, stride_x, dy, incy, stride_y, dA, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHpr2StridedBatchedFn(handle, uplo, N, d_alpha, dx, incx, stride_x, dy, incy, stride_y, dA, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hpr2(uplo, N, h_alpha, hx.data() + b * stride_x, incx, hy.data() + b * stride_y, incy, hA_cpu.data() + b * stride_A); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general( 1, dim_A, batch_count, 1, stride_A, hA_cpu.data(), hA_host.data()); unit_check_general( 1, dim_A, batch_count, 1, stride_A, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, dim_A, 1, stride_A, hA_cpu.data(), hA_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', 1, dim_A, 1, stride_A, hA_cpu.data(), hA_device.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHpr2StridedBatchedFn(handle, uplo, N, d_alpha, dx, incx, stride_x, dy, incy, stride_y, dA, stride_A, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHpr2StridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hpr2_gflop_count(N), hpr2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hpr_batched.hpp000066400000000000000000000165561434647641600235330ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHprBatchedModel = ArgumentModel; inline void testname_hpr_batched(const Arguments& arg, std::string& name) { hipblasHprBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hpr_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHprBatchedFn = FORTRAN ? hipblasHprBatched : hipblasHprBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int batch_count = arg.batch_count; size_t A_size = size_t(N) * (N + 1) / 2; double gpu_time_used, hipblas_error_host, hipblas_error_device; U h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHprBatchedFn(handle, uplo, N, nullptr, nullptr, incx, nullptr, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_cpu(A_size, 1, batch_count); host_batch_vector hA_host(A_size, 1, batch_count); host_batch_vector hA_device(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hA_cpu.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHprBatchedFn(handle, uplo, N, (U*)&h_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), batch_count)); CHECK_HIP_ERROR(hA_host.transfer_from(dA)); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHprBatchedFn( handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), batch_count)); CHECK_HIP_ERROR(hA_device.transfer_from(dA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hpr(uplo, N, h_alpha, hx[b], incx, hA_cpu[b]); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_size, batch_count, 1, hA_cpu, hA_host); unit_check_general(1, A_size, batch_count, 1, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_host, batch_count); hipblas_error_device = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dA.transfer_from(hA)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHprBatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHprBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hpr_gflop_count(N), hpr_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_hpr_strided_batched.hpp000066400000000000000000000171701434647641600252420ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasHprStridedBatchedModel = ArgumentModel; inline void testname_hpr_strided_batched(const Arguments& arg, std::string& name) { hipblasHprStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_hpr_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasHprStridedBatchedFn = FORTRAN ? hipblasHprStridedBatched : hipblasHprStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; size_t dim_A = size_t(N) * (N + 1) / 2; hipblasStride stride_A = dim_A * stride_scale; hipblasStride stride_x = size_t(N) * abs_incx * stride_scale; size_t A_size = stride_A * batch_count; size_t x_size = stride_x * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasHprStridedBatchedFn( handle, uplo, N, nullptr, nullptr, incx, stride_x, nullptr, stride_A, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); device_vector dA(A_size); device_vector dx(x_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; U h_alpha = arg.get_alpha(); // Initial Data on CPU hipblas_init_matrix( hA, arg, dim_A, 1, 1, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, false, true); // copy matrix is easy in STL; hA_cpu = hA: save a copy in hA_cpu which will be output of CPU BLAS hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(U), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasHprStridedBatchedFn( handle, uplo, N, (U*)&h_alpha, dx, incx, stride_x, dA, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasHprStridedBatchedFn( handle, uplo, N, d_alpha, dx, incx, stride_x, dA, stride_A, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_hpr( uplo, N, h_alpha, hx.data() + b * stride_x, incx, hA_cpu.data() + b * stride_A); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general( 1, dim_A, batch_count, 1, stride_A, hA_cpu.data(), hA_host.data()); unit_check_general( 1, dim_A, batch_count, 1, stride_A, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, dim_A, 1, stride_A, hA_cpu.data(), hA_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', 1, dim_A, 1, stride_A, hA_cpu.data(), hA_device.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasHprStridedBatchedFn( handle, uplo, N, d_alpha, dx, incx, stride_x, dA, stride_A, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasHprStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, hpr_gflop_count(N), hpr_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_iamax_iamin.hpp000066400000000000000000000151511434647641600235320ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" using hipblasIamaxIaminModel = ArgumentModel; template using hipblas_iamax_iamin_t = hipblasStatus_t (*)(hipblasHandle_t handle, int n, const T* x, int incx, int* result); template inline hipblasStatus_t testing_iamax_iamin(const Arguments& arg, hipblas_iamax_iamin_t func) { int N = arg.N; int incx = arg.incx; hipblasLocalHandle handle(arg); int zero = 0; // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0) { device_vector d_hipblas_result_0(1); host_vector h_hipblas_result_0(1); hipblas_init_nan(h_hipblas_result_0.data(), 1); CHECK_HIP_ERROR( hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(int), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(func(handle, N, nullptr, incx, d_hipblas_result_0)); host_vector cpu_0(1); host_vector gpu_0(1); CHECK_HIP_ERROR(hipMemcpy(gpu_0, d_hipblas_result_0, sizeof(int), hipMemcpyDeviceToHost)); unit_check_general(1, 1, 1, cpu_0, gpu_0); return HIPBLAS_STATUS_SUCCESS; } size_t sizeX = size_t(N) * incx; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this // practice host_vector hx(sizeX); int cpu_result, hipblas_result_host, hipblas_result_device; device_vector dx(sizeX); device_vector d_hipblas_result(1); // Initial Data on CPU hipblas_init_vector(hx, arg, N, incx, 0, 1, hipblas_client_alpha_sets_nan, true); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * N * incx, hipMemcpyHostToDevice)); double gpu_time_used; int hipblas_error_host, hipblas_error_device; if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ // device_pointer CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(func(handle, N, dx, incx, d_hipblas_result)); CHECK_HIP_ERROR(hipMemcpy( &hipblas_result_device, d_hipblas_result, sizeof(int), hipMemcpyDeviceToHost)); // host_pointer CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(func(handle, N, dx, incx, &hipblas_result_host)); /* ===================================================================== CPU BLAS =================================================================== */ REFBLAS_FUNC(N, hx.data(), incx, &cpu_result); // change to Fortran 1 based indexing as in BLAS standard, not cblas zero based indexing cpu_result += 1; if(arg.unit_check) { unit_check_general(1, 1, 1, &cpu_result, &hipblas_result_host); unit_check_general(1, 1, 1, &cpu_result, &hipblas_result_device); } if(arg.norm_check) { hipblas_error_host = std::abs(hipblas_result_host - cpu_result); hipblas_error_device = std::abs(hipblas_result_device - cpu_result); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(func(handle, N, dx, incx, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasIamaxIaminModel{}.log_args(std::cout, arg, gpu_time_used, iamax_gflop_count(N), iamax_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline void testname_amax(const Arguments& arg, std::string& name) { hipblasIamaxIaminModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_amax(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasIamaxFn = FORTRAN ? hipblasIamax : hipblasIamax; return testing_iamax_iamin>(arg, hipblasIamaxFn); } inline void testname_amin(const Arguments& arg, std::string& name) { hipblasIamaxIaminModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_amin(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasIaminFn = FORTRAN ? hipblasIamin : hipblasIamin; return testing_iamax_iamin>(arg, hipblasIamin); } hipBLAS-rocm-5.5.1/clients/include/testing_iamax_iamin_batched.hpp000066400000000000000000000176011434647641600252060ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" using hipblasIamaxIaminBatchedModel = ArgumentModel; template using hipblas_iamax_iamin_batched_t = hipblasStatus_t (*)( hipblasHandle_t handle, int n, const T* const x[], int incx, int batch_count, int* result); template inline hipblasStatus_t testing_iamax_iamin_batched(const Arguments& arg, hipblas_iamax_iamin_batched_t func) { int N = arg.N; int incx = arg.incx; int batch_count = arg.batch_count; hipblasLocalHandle handle(arg); int zero = 0; // check to prevent undefined memory allocation error if(batch_count <= 0 || N <= 0 || incx <= 0) { // quick return success device_vector d_hipblas_result_0(std::max(1, batch_count)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(int) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(func(handle, N, nullptr, incx, batch_count, d_hipblas_result_0)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(int) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } host_batch_vector hx(N, incx, batch_count); host_vector cpu_result(batch_count); host_vector hipblas_result_host(batch_count); host_vector hipblas_result_device(batch_count); device_batch_vector dx(N, incx, batch_count); device_vector d_hipblas_result_device(batch_count); CHECK_HIP_ERROR(dx.memcheck()); // Initial Data on CPU hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); CHECK_HIP_ERROR(dx.transfer_from(hx)); double gpu_time_used; int hipblas_error_host = 0, hipblas_error_device = 0; if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ // device_pointer CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( func(handle, N, dx.ptr_on_device(), incx, batch_count, d_hipblas_result_device)); CHECK_HIP_ERROR(hipMemcpy(hipblas_result_device, d_hipblas_result_device, sizeof(int) * batch_count, hipMemcpyDeviceToHost)); // host_pointer CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( func(handle, N, dx.ptr_on_device(), incx, batch_count, hipblas_result_host)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { REFBLAS_FUNC(N, hx[b], incx, cpu_result + b); // change to Fortran 1 based indexing as in BLAS standard, not cblas zero based indexing cpu_result[b] += 1; } if(arg.unit_check) { unit_check_general(1, 1, batch_count, cpu_result, hipblas_result_host); unit_check_general(1, 1, batch_count, cpu_result, hipblas_result_device); } if(arg.norm_check) { for(int b = 0; b < batch_count; b++) { hipblas_error_host = std::max(hipblas_error_host, std::abs(hipblas_result_host[b] - cpu_result[b])); hipblas_error_device = std::max(hipblas_error_device, std::abs(hipblas_result_device[b] - cpu_result[b])); } } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( func(handle, N, dx.ptr_on_device(), incx, batch_count, d_hipblas_result_device)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasIamaxIaminBatchedModel{}.log_args(std::cout, arg, gpu_time_used, iamax_gflop_count(N), iamax_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline void testname_amax_batched(const Arguments& arg, std::string& name) { hipblasIamaxIaminBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_amax_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasIamaxBatchedFn = FORTRAN ? hipblasIamaxBatched : hipblasIamaxBatched; return testing_iamax_iamin_batched>(arg, hipblasIamaxBatchedFn); } inline void testname_amin_batched(const Arguments& arg, std::string& name) { hipblasIamaxIaminBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_amin_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasIaminBatchedFn = FORTRAN ? hipblasIaminBatched : hipblasIaminBatched; return testing_iamax_iamin_batched>(arg, hipblasIaminBatchedFn); } hipBLAS-rocm-5.5.1/clients/include/testing_iamax_iamin_strided_batched.hpp000066400000000000000000000220021434647641600267130ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" using hipblasIamaxIaminStridedBatchedModel = ArgumentModel; template using hipblas_iamax_iamin_strided_batched_t = hipblasStatus_t (*)(hipblasHandle_t handle, int n, const T* x, int incx, hipblasStride stridex, int batch_count, int* result); template inline hipblasStatus_t testing_iamax_iamin_strided_batched(const Arguments& arg, hipblas_iamax_iamin_strided_batched_t func) { int N = arg.N; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasStride stridex = size_t(N) * incx * stride_scale; size_t sizeX = stridex * batch_count; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(batch_count <= 0 || N <= 0 || incx <= 0) { // quick return success or invalid value device_vector d_hipblas_result_0(std::max(1, batch_count)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(int) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( func(handle, N, nullptr, incx, stridex, batch_count, d_hipblas_result_0)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(int) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this // practice host_vector hx(sizeX); device_vector dx(sizeX); host_vector cpu_result(batch_count); host_vector hipblas_result_host(batch_count); host_vector hipblas_result_device(batch_count); device_vector d_hipblas_result(batch_count); // Initial Data on CPU hipblas_init_vector( hx, arg, N, incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); double gpu_time_used; int hipblas_error_host = 0, hipblas_error_device = 0; if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ // device_pointer CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(func(handle, N, dx, incx, stridex, batch_count, d_hipblas_result)); CHECK_HIP_ERROR(hipMemcpy(hipblas_result_device, d_hipblas_result, sizeof(int) * batch_count, hipMemcpyDeviceToHost)); // host_pointer CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(func(handle, N, dx, incx, stridex, batch_count, hipblas_result_host)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { REFBLAS_FUNC(N, hx.data() + b * stridex, incx, &(cpu_result[b])); // change to Fortran 1 based indexing as in BLAS standard, not cblas zero based indexing cpu_result[b] += 1; } if(arg.unit_check) { unit_check_general( 1, 1, batch_count, cpu_result.data(), hipblas_result_host.data()); unit_check_general( 1, 1, batch_count, cpu_result.data(), hipblas_result_device.data()); } if(arg.norm_check) { for(int b = 0; b < batch_count; b++) { hipblas_error_host = std::max(hipblas_error_host, std::abs(hipblas_result_host[b] - cpu_result[b])); hipblas_error_device = std::max(hipblas_error_device, std::abs(hipblas_result_device[b] - cpu_result[b])); } } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(func(handle, N, dx, incx, stridex, batch_count, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasIamaxIaminStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, iamax_gflop_count(N), iamax_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline void testname_amax_strided_batched(const Arguments& arg, std::string& name) { hipblasIamaxIaminStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_amax_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasIamaxStridedBatchedFn = FORTRAN ? hipblasIamaxStridedBatched : hipblasIamaxStridedBatched; return testing_iamax_iamin_strided_batched>(arg, hipblasIamaxStridedBatchedFn); } inline void testname_amin_strided_batched(const Arguments& arg, std::string& name) { hipblasIamaxIaminStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_amin_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasIaminStridedBatchedFn = FORTRAN ? hipblasIaminStridedBatched : hipblasIaminStridedBatched; return testing_iamax_iamin_strided_batched>(arg, hipblasIaminStridedBatchedFn); } hipBLAS-rocm-5.5.1/clients/include/testing_nrm2.hpp000066400000000000000000000132361434647641600221360ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasNrm2Model = ArgumentModel; inline void testname_nrm2(const Arguments& arg, std::string& name) { hipblasNrm2Model{}.test_name(arg, name); } template inline hipblasStatus_t testing_nrm2(const Arguments& arg) { using Tr = real_t; bool FORTRAN = arg.fortran; auto hipblasNrm2Fn = FORTRAN ? hipblasNrm2 : hipblasNrm2; int N = arg.N; int incx = arg.incx; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0) { device_vector d_hipblas_result_0(1); host_vector h_hipblas_result_0(1); hipblas_init_nan(h_hipblas_result_0.data(), 1); CHECK_HIP_ERROR( hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2Fn(handle, N, nullptr, incx, d_hipblas_result_0)); host_vector cpu_0(1); host_vector gpu_0(1); CHECK_HIP_ERROR(hipMemcpy(gpu_0, d_hipblas_result_0, sizeof(Tr), hipMemcpyDeviceToHost)); unit_check_general(1, 1, 1, cpu_0, gpu_0); return HIPBLAS_STATUS_SUCCESS; } size_t sizeX = size_t(N) * incx; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); device_vector dx(sizeX); device_vector d_hipblas_result(1); Tr cpu_result, hipblas_result_host, hipblas_result_device; double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx, arg, N, incx, 0, 1, hipblas_client_alpha_sets_nan, true); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * N * incx, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { // hipblasNrm2 accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2Fn(handle, N, dx, incx, d_hipblas_result)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasNrm2Fn(handle, N, dx, incx, &hipblas_result_host)); CHECK_HIP_ERROR( hipMemcpy(&hipblas_result_device, d_hipblas_result, sizeof(Tr), hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_nrm2(N, hx.data(), incx, &cpu_result); if(arg.unit_check) { unit_check_nrm2(cpu_result, hipblas_result_host, N); unit_check_nrm2(cpu_result, hipblas_result_device, N); } if(arg.norm_check) { hipblas_error_host = vector_norm_1(1, 1, &cpu_result, &hipblas_result_host); hipblas_error_device = vector_norm_1(1, 1, &cpu_result, &hipblas_result_device); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasNrm2Fn(handle, N, dx, incx, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasNrm2Model{}.log_args(std::cout, arg, gpu_time_used, nrm2_gflop_count(N), nrm2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_nrm2_batched.hpp000066400000000000000000000155561434647641600236170ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasNrm2BatchedModel = ArgumentModel; inline void testname_nrm2_batched(const Arguments& arg, std::string& name) { hipblasNrm2BatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_nrm2_batched(const Arguments& arg) { using Tr = real_t; bool FORTRAN = arg.fortran; auto hipblasNrm2BatchedFn = FORTRAN ? hipblasNrm2Batched : hipblasNrm2Batched; int N = arg.N; int incx = arg.incx; int batch_count = arg.batch_count; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(1, batch_count)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasNrm2BatchedFn(handle, N, nullptr, incx, batch_count, d_hipblas_result_0)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } size_t sizeX = size_t(N) * incx; double gpu_time_used; double hipblas_error_host = 0, hipblas_error_device = 0; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_vector h_cpu_result(batch_count); host_vector h_hipblas_result_host(batch_count); host_vector h_hipblas_result_device(batch_count); device_batch_vector dx(N, incx, batch_count); device_vector d_hipblas_result(batch_count); CHECK_HIP_ERROR(dx.memcheck()); // Initial Data on CPU hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); CHECK_HIP_ERROR(dx.transfer_from(hx)); if(arg.unit_check || arg.norm_check) { // hipblasNrm2 accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2BatchedFn( handle, N, dx.ptr_on_device(), incx, batch_count, d_hipblas_result)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasNrm2BatchedFn( handle, N, dx.ptr_on_device(), incx, batch_count, h_hipblas_result_host)); CHECK_HIP_ERROR(hipMemcpy(h_hipblas_result_device, d_hipblas_result, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_nrm2(N, hx[b], incx, &(h_cpu_result[b])); } if(arg.unit_check) { unit_check_nrm2(batch_count, h_cpu_result, h_hipblas_result_host, N); unit_check_nrm2(batch_count, h_cpu_result, h_hipblas_result_device, N); } if(arg.norm_check) { for(int b = 0; b < batch_count; b++) { hipblas_error_host = std::max(vector_norm_1(1, 1, &(h_cpu_result[b]), &(h_hipblas_result_host[b])), hipblas_error_host); hipblas_error_device = std::max( vector_norm_1(1, 1, &(h_cpu_result[b]), &(h_hipblas_result_device[b])), hipblas_error_device); } } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasNrm2BatchedFn( handle, N, dx.ptr_on_device(), incx, batch_count, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasNrm2BatchedModel{}.log_args(std::cout, arg, gpu_time_used, nrm2_gflop_count(N), nrm2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_nrm2_batched_ex.hpp000066400000000000000000000241631434647641600243050ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasNrm2BatchedExModel = ArgumentModel; inline void testname_nrm2_batched_ex(const Arguments& arg, std::string& name) { hipblasNrm2BatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_nrm2_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasNrm2BatchedExFn = FORTRAN ? hipblasNrm2BatchedExFortran : hipblasNrm2BatchedEx; int N = arg.N; int incx = arg.incx; int batch_count = arg.batch_count; hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t resultType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(batch_count, 1)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2BatchedExFn(handle, N, nullptr, xType, incx, batch_count, d_hipblas_result_0, resultType, executionType)); if(batch_count > 0) { // TODO: error in rocBLAS - only setting the first element to 0, not for all batches // host_vector cpu_0(batch_count); // host_vector gpu_0(batch_count); // CHECK_HIP_ERROR(hipMemcpy( // gpu_0, d_hipblas_result_0, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); // unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_vector h_cpu_result(batch_count); host_vector h_hipblas_result_host(batch_count); host_vector h_hipblas_result_device(batch_count); device_batch_vector dx(N, incx, batch_count); device_vector d_hipblas_result(batch_count); CHECK_HIP_ERROR(dx.memcheck()); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); CHECK_HIP_ERROR(dx.transfer_from(hx)); if(arg.unit_check || arg.norm_check) { // hipblasNrm2 accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2BatchedExFn(handle, N, dx.ptr_on_device(), xType, incx, batch_count, d_hipblas_result, resultType, executionType)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasNrm2BatchedExFn(handle, N, dx.ptr_on_device(), xType, incx, batch_count, h_hipblas_result_host, resultType, executionType)); CHECK_HIP_ERROR(hipMemcpy(h_hipblas_result_device, d_hipblas_result, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_nrm2(N, hx[b], incx, &(h_cpu_result[b])); } if(arg.unit_check) { unit_check_nrm2(batch_count, h_cpu_result, h_hipblas_result_host, N); unit_check_nrm2(batch_count, h_cpu_result, h_hipblas_result_device, N); } if(arg.norm_check) { for(int b = 0; b < batch_count; b++) { hipblas_error_host = std::max(vector_norm_1(1, 1, &(h_cpu_result[b]), &(h_hipblas_result_host[b])), hipblas_error_host); hipblas_error_device = std::max( vector_norm_1(1, 1, &(h_cpu_result[b]), &(h_hipblas_result_device[b])), hipblas_error_device); } } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasNrm2BatchedExFn(handle, N, dx.ptr_on_device(), xType, incx, batch_count, d_hipblas_result, resultType, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasNrm2BatchedExModel{}.log_args(std::cout, arg, gpu_time_used, nrm2_gflop_count(N), nrm2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_nrm2_batched_ex(Arguments arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t resultType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(xType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_nrm2_batched_ex_template(arg); } else if(xType == HIPBLAS_R_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_nrm2_batched_ex_template(arg); } else if(xType == HIPBLAS_R_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_nrm2_batched_ex_template(arg); } else if(xType == HIPBLAS_C_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_nrm2_batched_ex_template(arg); } else if(xType == HIPBLAS_C_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_nrm2_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_nrm2_ex.hpp000066400000000000000000000164021434647641600226300ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasNrm2ExModel = ArgumentModel; inline void testname_nrm2_ex(const Arguments& arg, std::string& name) { hipblasNrm2ExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_nrm2_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasNrm2ExFn = FORTRAN ? hipblasNrm2ExFortran : hipblasNrm2Ex; int N = arg.N; int incx = arg.incx; hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t resultType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0) { device_vector d_hipblas_result_0(1); host_vector h_hipblas_result_0(1); hipblas_init_nan(h_hipblas_result_0.data(), 1); CHECK_HIP_ERROR( hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2ExFn( handle, N, nullptr, xType, incx, d_hipblas_result_0, resultType, executionType)); host_vector cpu_0(1); host_vector gpu_0(1); CHECK_HIP_ERROR(hipMemcpy(gpu_0, d_hipblas_result_0, sizeof(Tr), hipMemcpyDeviceToHost)); unit_check_general(1, 1, 1, cpu_0, gpu_0); return HIPBLAS_STATUS_SUCCESS; } size_t sizeX = size_t(N) * incx; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); device_vector dx(sizeX); device_vector d_hipblas_result(1); Tr cpu_result, hipblas_result_host, hipblas_result_device; double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx, arg, N, incx, 0, 1, hipblas_client_alpha_sets_nan, true); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { // hipblasNrm2 accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2ExFn( handle, N, dx, xType, incx, d_hipblas_result, resultType, executionType)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasNrm2ExFn( handle, N, dx, xType, incx, &hipblas_result_host, resultType, executionType)); CHECK_HIP_ERROR( hipMemcpy(&hipblas_result_device, d_hipblas_result, sizeof(Tr), hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_nrm2(N, hx.data(), incx, &cpu_result); if(arg.unit_check) { unit_check_nrm2(cpu_result, hipblas_result_host, N); unit_check_nrm2(cpu_result, hipblas_result_device, N); } if(arg.norm_check) { hipblas_error_host = vector_norm_1(1, 1, &cpu_result, &hipblas_result_host); hipblas_error_device = vector_norm_1(1, 1, &cpu_result, &hipblas_result_device); } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasNrm2ExFn( handle, N, dx, xType, incx, d_hipblas_result, resultType, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasNrm2ExModel{}.log_args(std::cout, arg, gpu_time_used, nrm2_gflop_count(N), nrm2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_nrm2_ex(Arguments arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t resultType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(xType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_nrm2_ex_template(arg); } else if(xType == HIPBLAS_R_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_nrm2_ex_template(arg); } else if(xType == HIPBLAS_R_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_nrm2_ex_template(arg); } else if(xType == HIPBLAS_C_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_nrm2_ex_template(arg); } else if(xType == HIPBLAS_C_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_nrm2_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_nrm2_strided_batched.hpp000066400000000000000000000164151434647641600253300ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasNrm2StridedBatchedModel = ArgumentModel; inline void testname_nrm2_strided_batched(const Arguments& arg, std::string& name) { hipblasNrm2StridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_nrm2_strided_batched(const Arguments& arg) { using Tr = real_t; bool FORTRAN = arg.fortran; auto hipblasNrm2StridedBatchedFn = FORTRAN ? hipblasNrm2StridedBatched : hipblasNrm2StridedBatched; int N = arg.N; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasStride stridex = size_t(N) * incx * stride_scale; size_t sizeX = stridex * batch_count; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(1, batch_count)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2StridedBatchedFn( handle, N, nullptr, incx, stridex, batch_count, d_hipblas_result_0)); if(batch_count > 0) { host_vector cpu_0(batch_count); host_vector gpu_0(batch_count); CHECK_HIP_ERROR(hipMemcpy( gpu_0, d_hipblas_result_0, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector h_hipblas_result_host(batch_count); host_vector h_hipblas_result_device(batch_count); host_vector h_cpu_result(batch_count); device_vector dx(sizeX); device_vector d_hipblas_result(batch_count); double gpu_time_used; double hipblas_error_host = 0, hipblas_error_device = 0; // Initial Data on CPU hipblas_init_vector( hx, arg, N, incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { // hipblasNrm2 accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2StridedBatchedFn( handle, N, dx, incx, stridex, batch_count, d_hipblas_result)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasNrm2StridedBatchedFn( handle, N, dx, incx, stridex, batch_count, h_hipblas_result_host)); CHECK_HIP_ERROR(hipMemcpy(h_hipblas_result_device, d_hipblas_result, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_nrm2(N, hx.data() + b * stridex, incx, &(h_cpu_result[b])); } if(arg.unit_check) { unit_check_nrm2(batch_count, h_cpu_result, h_hipblas_result_host, N); unit_check_nrm2(batch_count, h_cpu_result, h_hipblas_result_device, N); } if(arg.norm_check) { for(int b = 0; b < batch_count; b++) { hipblas_error_host = std::max(vector_norm_1(1, 1, &(h_cpu_result[b]), &(h_hipblas_result_host[b])), hipblas_error_host); hipblas_error_device = std::max( vector_norm_1(1, 1, &(h_cpu_result[b]), &(h_hipblas_result_device[b])), hipblas_error_device); } } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasNrm2StridedBatchedFn( handle, N, dx, incx, stridex, batch_count, d_hipblas_result)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasNrm2StridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, nrm2_gflop_count(N), nrm2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_nrm2_strided_batched_ex.hpp000066400000000000000000000260041434647641600260170ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasNrm2StridedBatchedExModel = ArgumentModel; inline void testname_nrm2_strided_batched_ex(const Arguments& arg, std::string& name) { hipblasNrm2StridedBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_nrm2_strided_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasNrm2StridedBatchedExFn = FORTRAN ? hipblasNrm2StridedBatchedExFortran : hipblasNrm2StridedBatchedEx; int N = arg.N; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; hipblasStride stridex = size_t(N) * incx * stride_scale; size_t sizeX = stridex * batch_count; hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t resultType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || incx <= 0 || batch_count <= 0) { device_vector d_hipblas_result_0(std::max(batch_count, 1)); host_vector h_hipblas_result_0(std::max(1, batch_count)); hipblas_init_nan(h_hipblas_result_0.data(), std::max(1, batch_count)); CHECK_HIP_ERROR(hipMemcpy(d_hipblas_result_0, h_hipblas_result_0, sizeof(Tr) * std::max(1, batch_count), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2StridedBatchedExFortran(handle, N, nullptr, xType, incx, stridex, batch_count, d_hipblas_result_0, resultType, executionType)); if(batch_count > 0) { // TODO: error in rocBLAS - only setting the first element to 0, not for all batches // host_vector cpu_0(batch_count); // host_vector gpu_0(batch_count); // CHECK_HIP_ERROR(hipMemcpy( // gpu_0, d_hipblas_result_0, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); // unit_check_general(1, batch_count, 1, cpu_0, gpu_0); } return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector h_hipblas_result_host(batch_count); host_vector h_hipblas_result_device(batch_count); host_vector h_cpu_result(batch_count); device_vector dx(sizeX); device_vector d_hipblas_result(batch_count); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector( hx, arg, N, incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { // hipblasNrm2 accept both dev/host pointer for the scalar CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasNrm2StridedBatchedExFn(handle, N, dx, xType, incx, stridex, batch_count, d_hipblas_result, resultType, executionType)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasNrm2StridedBatchedExFn(handle, N, dx, xType, incx, stridex, batch_count, h_hipblas_result_host, resultType, executionType)); CHECK_HIP_ERROR(hipMemcpy(h_hipblas_result_device, d_hipblas_result, sizeof(Tr) * batch_count, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_nrm2(N, hx.data() + b * stridex, incx, &(h_cpu_result[b])); } if(arg.unit_check) { unit_check_nrm2(batch_count, h_cpu_result, h_hipblas_result_host, N); unit_check_nrm2(batch_count, h_cpu_result, h_hipblas_result_device, N); } if(arg.norm_check) { for(int b = 0; b < batch_count; b++) { hipblas_error_host = std::max(vector_norm_1(1, 1, &(h_cpu_result[b]), &(h_hipblas_result_host[b])), hipblas_error_host); hipblas_error_device = std::max( vector_norm_1(1, 1, &(h_cpu_result[b]), &(h_hipblas_result_device[b])), hipblas_error_device); } } } // end of if unit/norm check if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasNrm2StridedBatchedExFn(handle, N, dx, xType, incx, stridex, batch_count, d_hipblas_result, resultType, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasNrm2StridedBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, nrm2_gflop_count(N), nrm2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_nrm2_strided_batched_ex(Arguments arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t resultType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(xType == HIPBLAS_R_16F && resultType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_nrm2_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_nrm2_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_R_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_nrm2_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_C_32F && resultType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_nrm2_strided_batched_ex_template(arg); } else if(xType == HIPBLAS_C_64F && resultType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_nrm2_strided_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_rot.hpp000066400000000000000000000162711434647641600220660ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotModel = ArgumentModel; inline void testname_rot(const Arguments& arg, std::string& name) { hipblasRotModel{}.test_name(arg, name); } template hipblasStatus_t testing_rot(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasRotFn = FORTRAN ? hipblasRot : hipblasRot; int N = arg.N; int incx = arg.incx; int incy = arg.incy; const U rel_error = std::numeric_limits::epsilon() * 1000; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0) { CHECK_HIPBLAS_ERROR( hipblasRotFn(handle, N, nullptr, incx, nullptr, incy, nullptr, nullptr)); return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t size_x = N * size_t(abs_incx); size_t size_y = N * size_t(abs_incy); if(!size_x) size_x = 1; if(!size_y) size_y = 1; device_vector dx(size_x); device_vector dy(size_y); device_vector dc(1); device_vector ds(1); // Initial Data on CPU host_vector hx(size_x); host_vector hy(size_y); host_vector hc(1); host_vector hs(1); // Random alpha (0 - 10) host_vector alpha(1); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_never_set_nan, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_never_set_nan, false); hipblas_init_vector(alpha, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); // cos and sin of alpha (in rads) hc[0] = cos(alpha[0]); hs[0] = sin(alpha[0]); // CPU BLAS reference data host_vector cx = hx; host_vector cy = hy; cblas_rot(N, cx.data(), incx, cy.data(), incy, *hc, *hs); if(arg.unit_check || arg.norm_check) { // Test host { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasRotFn(handle, N, dx, incx, dy, incy, hc, hs)); host_vector rx(size_x); host_vector ry(size_y); CHECK_HIP_ERROR(hipMemcpy(rx, dx, sizeof(T) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(ry, dy, sizeof(T) * size_y, hipMemcpyDeviceToHost)); if(arg.unit_check) { near_check_general(1, N, abs_incx, cx.data(), rx.data(), double(rel_error)); near_check_general(1, N, abs_incy, cy.data(), ry.data(), double(rel_error)); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incx, cx, rx); hipblas_error_host += norm_check_general('F', 1, N, abs_incy, cy, ry); } } // Test device { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(V), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasRotFn(handle, N, dx, incx, dy, incy, dc, ds)); host_vector rx(size_x); host_vector ry(size_y); CHECK_HIP_ERROR(hipMemcpy(rx, dx, sizeof(T) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(ry, dy, sizeof(T) * size_y, hipMemcpyDeviceToHost)); if(arg.unit_check) { near_check_general(1, N, abs_incx, cx.data(), rx.data(), double(rel_error)); near_check_general(1, N, abs_incy, cy.data(), ry.data(), double(rel_error)); } if(arg.norm_check) { hipblas_error_device = norm_check_general('F', 1, N, abs_incx, cx, rx); hipblas_error_device += norm_check_general('F', 1, N, abs_incy, cy, ry); } } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(V), hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasRotFn(handle, N, dx, incx, dy, incy, dc, ds)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotModel{}.log_args(std::cout, arg, gpu_time_used, rot_gflop_count(N), rot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rot_batched.hpp000066400000000000000000000215721434647641600235400ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotBatchedModel = ArgumentModel; inline void testname_rot_batched(const Arguments& arg, std::string& name) { hipblasRotBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rot_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasRotBatchedFn = FORTRAN ? hipblasRotBatched : hipblasRotBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; const U rel_error = std::numeric_limits::epsilon() * 1000; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR((hipblasRotBatchedFn( handle, N, nullptr, incx, nullptr, incy, nullptr, nullptr, batch_count))); return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; double gpu_time_used, hipblas_error_host, hipblas_error_device; device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector dc(1); device_vector ds(1); // Initial Data on CPU host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); host_vector hc(1); host_vector hs(1); // Random alpha (0 - 10) host_vector alpha(1); hipblas_init_vector(hx, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hy, arg, hipblas_client_never_set_nan, false); hipblas_init_vector(alpha, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); // cos and sin of alpha (in rads) hc[0] = cos(alpha[0]); hs[0] = sin(alpha[0]); // CPU BLAS reference data host_batch_vector cx(N, incx, batch_count); host_batch_vector cy(N, incy, batch_count); cx.copy_from(hx); cy.copy_from(hy); // cblas_rotg(cx, cy, hc, hs); // cx[0] = hx[0]; // cy[0] = hy[0]; for(int b = 0; b < batch_count; b++) { cblas_rot(N, cx[b], incx, cy[b], incy, *hc, *hs); } if(arg.unit_check || arg.norm_check) { // Test host { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIPBLAS_ERROR((hipblasRotBatchedFn(handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, hc, hs, batch_count))); host_batch_vector rx(N, incx, batch_count); host_batch_vector ry(N, incy, batch_count); CHECK_HIP_ERROR(rx.transfer_from(dx)); CHECK_HIP_ERROR(ry.transfer_from(dy)); if(arg.unit_check) { for(int b = 0; b < batch_count; b++) { near_check_general(1, N, abs_incx, cx[b], rx[b], rel_error); near_check_general(1, N, abs_incy, cy[b], ry[b], rel_error); } } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incx, cx, rx, batch_count); hipblas_error_host += norm_check_general('F', 1, N, abs_incy, cy, ry, batch_count); } } // Test device { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(V), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR((hipblasRotBatchedFn(handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dc, ds, batch_count))); host_batch_vector rx(N, incx, batch_count); host_batch_vector ry(N, incy, batch_count); CHECK_HIP_ERROR(rx.transfer_from(dx)); CHECK_HIP_ERROR(ry.transfer_from(dy)); if(arg.unit_check) { for(int b = 0; b < batch_count; b++) { near_check_general(1, N, abs_incx, cx[b], rx[b], rel_error); near_check_general(1, N, abs_incy, cy[b], ry[b], rel_error); } } if(arg.norm_check) { hipblas_error_device = norm_check_general('F', 1, N, abs_incx, cx, rx, batch_count); hipblas_error_device += norm_check_general('F', 1, N, abs_incy, cy, ry, batch_count); } } } if(arg.timing) { CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(V), hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR((hipblasRotBatchedFn(handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dc, ds, batch_count))); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotBatchedModel{}.log_args(std::cout, arg, gpu_time_used, rot_gflop_count(N), rot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rot_batched_ex.hpp000066400000000000000000000273641434647641600242410ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotBatchedExModel = ArgumentModel; inline void testname_rot_batched_ex(const Arguments& arg, std::string& name) { hipblasRotBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rot_batched_ex_template(const Arguments& arg) { using Ty = Tx; bool FORTRAN = arg.fortran; auto hipblasRotBatchedExFn = FORTRAN ? hipblasRotBatchedExFortran : hipblasRotBatchedEx; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t csType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasRotBatchedExFn(handle, N, nullptr, xType, incx, nullptr, yType, incy, nullptr, nullptr, csType, batch_count, executionType)); return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; double gpu_time_used, hipblas_error_host, hipblas_error_device; host_batch_vector hx_host(N, incx, batch_count); host_batch_vector hy_host(N, incy, batch_count); host_batch_vector hx_device(N, incx, batch_count); host_batch_vector hy_device(N, incy, batch_count); host_batch_vector hx_cpu(N, incx, batch_count); host_batch_vector hy_cpu(N, incy, batch_count); host_vector hc(1); host_vector hs(1); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector dc(1); device_vector ds(1); hipblas_init_vector(hx_host, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hy_host, arg, hipblas_client_never_set_nan, false); hipblas_init_vector(hc, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); hipblas_init_vector(hs, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); hx_device.copy_from(hx_host); hx_cpu.copy_from(hx_host); hy_device.copy_from(hy_host); hy_cpu.copy_from(hy_host); CHECK_HIP_ERROR(dx.transfer_from(hx_host)); CHECK_HIP_ERROR(dy.transfer_from(hy_host)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(Tcs), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(Tcs), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { // HIPBLAS CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasRotBatchedExFn(handle, N, dx.ptr_on_device(), xType, incx, dy.ptr_on_device(), yType, incy, hc, hs, csType, batch_count, executionType)); CHECK_HIP_ERROR(hx_host.transfer_from(dx)); CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dx.transfer_from(hx_device)); CHECK_HIP_ERROR(dy.transfer_from(hy_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasRotBatchedExFn(handle, N, dx.ptr_on_device(), xType, incx, dy.ptr_on_device(), yType, incy, dc, ds, csType, batch_count, executionType)); CHECK_HIP_ERROR(hx_device.transfer_from(dx)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); // CBLAS for(int b = 0; b < batch_count; b++) { cblas_rot(N, hx_cpu[b], incx, hy_cpu[b], incy, *hc, *hs); } if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incx, hx_cpu, hx_host); unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incx, hx_cpu, hx_device); unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incx, hx_cpu, hx_host, batch_count); hipblas_error_host += norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, N, abs_incx, hx_cpu, hx_device, batch_count); hipblas_error_device += norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasRotBatchedExFn(handle, N, dx.ptr_on_device(), xType, incx, dy.ptr_on_device(), yType, incy, dc, ds, csType, batch_count, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, rot_gflop_count(N), rot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_rot_batched_ex(Arguments arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t csType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(executionType == HIPBLAS_R_32F && xType == yType && xType == HIPBLAS_R_16B && csType == HIPBLAS_R_16B) { status = testing_rot_batched_ex_template(arg); } else if(executionType == HIPBLAS_R_32F && xType == yType && xType == HIPBLAS_R_16F && csType == HIPBLAS_R_16F) { status = testing_rot_batched_ex_template(arg); } else if(executionType == HIPBLAS_R_32F && xType == yType && xType == HIPBLAS_R_32F && csType == HIPBLAS_R_32F) { status = testing_rot_batched_ex_template(arg); } else if(executionType == HIPBLAS_R_64F && xType == yType && xType == HIPBLAS_R_64F && csType == HIPBLAS_R_64F) { status = testing_rot_batched_ex_template(arg); } else if(executionType == HIPBLAS_C_32F && xType == yType && xType == HIPBLAS_C_32F && csType == HIPBLAS_R_32F) { status = testing_rot_batched_ex_template(arg); } else if(executionType == HIPBLAS_C_32F && xType == yType && xType == HIPBLAS_C_32F && csType == HIPBLAS_C_32F) { status = testing_rot_batched_ex_template(arg); } else if(executionType == HIPBLAS_C_64F && xType == yType && xType == HIPBLAS_C_64F && csType == HIPBLAS_R_64F) { status = testing_rot_batched_ex_template( arg); } else if(executionType == HIPBLAS_C_64F && xType == yType && xType == HIPBLAS_C_64F && csType == HIPBLAS_C_64F) { status = testing_rot_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_rot_ex.hpp000066400000000000000000000233701434647641600225600ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotExModel = ArgumentModel; inline void testname_rot_ex(const Arguments& arg, std::string& name) { hipblasRotExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rot_ex_template(const Arguments& arg) { using Ty = Tx; bool FORTRAN = arg.fortran; auto hipblasRotExFn = FORTRAN ? hipblasRotExFortran : hipblasRotEx; int N = arg.N; int incx = arg.incx; int incy = arg.incy; hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t csType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0) { CHECK_HIPBLAS_ERROR(hipblasRotExFn(handle, N, nullptr, xType, incx, nullptr, yType, incy, nullptr, nullptr, csType, executionType)); return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t size_x = N * size_t(abs_incx); size_t size_y = N * size_t(abs_incy); if(!size_x) size_x = 1; if(!size_y) size_y = 1; device_vector dx(size_x); device_vector dy(size_y); device_vector dc(1); device_vector ds(1); // Initial Data on CPU host_vector hx_host(size_x); host_vector hy_host(size_y); host_vector hx_device(size_x); host_vector hy_device(size_y); host_vector hx_cpu(size_x); host_vector hy_cpu(size_y); host_vector hc(1); host_vector hs(1); // Random alpha (0 - 10) host_vector alpha(1); hipblas_init_vector(hx_host, arg, N, abs_incx, 0, 1, hipblas_client_never_set_nan, true); hipblas_init_vector(hy_host, arg, N, abs_incy, 0, 1, hipblas_client_never_set_nan, false); hipblas_init_vector(alpha, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); hipblas_init_vector(hc, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); hipblas_init_vector(hs, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); // // cos and sin of alpha (in rads) // hc[0] = cos(alpha[0]); // hs[0] = sin(alpha[0]); // CPU BLAS reference data hx_cpu = hx_device = hx_host; hy_cpu = hy_device = hy_host; CHECK_HIP_ERROR(hipMemcpy(dx, hx_host, sizeof(Tx) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy_host, sizeof(Ty) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(Tcs), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(Tcs), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { // HIPBLAS CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasRotExFn( handle, N, dx, xType, incx, dy, yType, incy, hc, hs, csType, executionType)); CHECK_HIP_ERROR(hipMemcpy(hx_host, dx, sizeof(Tx) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hy_host, dy, sizeof(Ty) * size_y, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dx, hx_device, sizeof(Tx) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy_device, sizeof(Ty) * size_y, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasRotExFn( handle, N, dx, xType, incx, dy, yType, incy, dc, ds, csType, executionType)); CHECK_HIP_ERROR(hipMemcpy(hx_device, dx, sizeof(Tx) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hy_device, dy, sizeof(Ty) * size_y, hipMemcpyDeviceToHost)); // CBLAS cblas_rot(N, hx_cpu.data(), incx, hy_cpu.data(), incy, *hc, *hs); if(arg.unit_check) { unit_check_general(1, N, abs_incx, hx_cpu, hx_host); unit_check_general(1, N, abs_incy, hy_cpu, hy_host); unit_check_general(1, N, abs_incx, hx_cpu, hx_device); unit_check_general(1, N, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incx, hx_cpu, hx_host); hipblas_error_host += norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_host); hipblas_error_device = norm_check_general('F', 1, N, abs_incx, hx_cpu, hx_device); hipblas_error_device += norm_check_general('F', 1, N, abs_incy, hy_cpu, hy_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasRotExFn( handle, N, dx, xType, incx, dy, yType, incy, dc, ds, csType, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotExModel{}.log_args(std::cout, arg, gpu_time_used, rot_gflop_count(N), rot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_rot_ex(Arguments arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t csType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(executionType == HIPBLAS_R_32F && xType == yType && xType == HIPBLAS_R_16B && csType == HIPBLAS_R_16B) { status = testing_rot_ex_template(arg); } else if(executionType == HIPBLAS_R_32F && xType == yType && xType == HIPBLAS_R_16F && csType == HIPBLAS_R_16F) { status = testing_rot_ex_template(arg); } else if(executionType == HIPBLAS_R_32F && xType == yType && xType == HIPBLAS_R_32F && csType == HIPBLAS_R_32F) { status = testing_rot_ex_template(arg); } else if(executionType == HIPBLAS_R_64F && xType == yType && xType == HIPBLAS_R_64F && csType == HIPBLAS_R_64F) { status = testing_rot_ex_template(arg); } else if(executionType == HIPBLAS_C_32F && xType == yType && xType == HIPBLAS_C_32F && csType == HIPBLAS_R_32F) { status = testing_rot_ex_template(arg); } else if(executionType == HIPBLAS_C_32F && xType == yType && xType == HIPBLAS_C_32F && csType == HIPBLAS_C_32F) { status = testing_rot_ex_template(arg); } else if(executionType == HIPBLAS_C_64F && xType == yType && xType == HIPBLAS_C_64F && csType == HIPBLAS_R_64F) { status = testing_rot_ex_template(arg); } else if(executionType == HIPBLAS_C_64F && xType == yType && xType == HIPBLAS_C_64F && csType == HIPBLAS_C_64F) { status = testing_rot_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_rot_strided_batched.hpp000066400000000000000000000220401434647641600252450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotStridedBatchedModel = ArgumentModel; inline void testname_rot_strided_batched(const Arguments& arg, std::string& name) { hipblasRotStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rot_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasRotStridedBatchedFn = FORTRAN ? hipblasRotStridedBatched : hipblasRotStridedBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stride_x = size_t(N) * abs_incx * stride_scale; hipblasStride stride_y = size_t(N) * abs_incy * stride_scale; const U rel_error = std::numeric_limits::epsilon() * 1000; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR((hipblasRotStridedBatchedFn(handle, N, nullptr, incx, stride_x, nullptr, incy, stride_y, nullptr, nullptr, batch_count))); return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; size_t size_x = N * size_t(abs_incx) + size_t(stride_x) * size_t(batch_count - 1); size_t size_y = N * size_t(abs_incy) + size_t(stride_y) * size_t(batch_count - 1); if(!size_x) size_x = 1; if(!size_y) size_y = 1; device_vector dx(size_x); device_vector dy(size_y); device_vector dc(1); device_vector ds(1); // Initial Data on CPU host_vector hx(size_x); host_vector hy(size_y); host_vector hc(1); host_vector hs(1); // Random alpha (0 - 10) host_vector alpha(1); hipblas_init_vector( hx, arg, N, abs_incx, stride_x, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hy, arg, N, abs_incy, stride_y, batch_count, hipblas_client_never_set_nan, false); hipblas_init_vector(alpha, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); // cos and sin of alpha (in rads) hc[0] = cos(alpha[0]); hs[0] = sin(alpha[0]); // CPU BLAS reference data host_vector cx = hx; host_vector cy = hy; // cblas_rotg(cx, cy, hc, hs); // cx[0] = hx[0]; // cy[0] = hy[0]; for(int b = 0; b < batch_count; b++) { cblas_rot( N, cx.data() + b * stride_x, incx, cy.data() + b * stride_y, incy, *hc, *hs); } if(arg.unit_check || arg.norm_check) { // Test host { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR((hipblasRotStridedBatchedFn( handle, N, dx, incx, stride_x, dy, incy, stride_y, hc, hs, batch_count))); host_vector rx(size_x); host_vector ry(size_y); CHECK_HIP_ERROR(hipMemcpy(rx, dx, sizeof(T) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(ry, dy, sizeof(T) * size_y, hipMemcpyDeviceToHost)); if(arg.unit_check) { near_check_general(1, N, batch_count, abs_incx, stride_x, cx, rx, rel_error); near_check_general(1, N, batch_count, abs_incy, stride_y, cy, ry, rel_error); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incx, stride_x, cx, rx, batch_count); hipblas_error_host += norm_check_general('F', 1, N, abs_incy, stride_y, cy, ry, batch_count); } } // Test device { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(V), hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR((hipblasRotStridedBatchedFn( handle, N, dx, incx, stride_x, dy, incy, stride_y, dc, ds, batch_count))); host_vector rx(size_x); host_vector ry(size_y); CHECK_HIP_ERROR(hipMemcpy(rx, dx, sizeof(T) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(ry, dy, sizeof(T) * size_y, hipMemcpyDeviceToHost)); if(arg.unit_check) { near_check_general(1, N, batch_count, abs_incx, stride_x, cx, rx, rel_error); near_check_general(1, N, batch_count, abs_incy, stride_y, cy, ry, rel_error); } if(arg.norm_check) { hipblas_error_device = norm_check_general('F', 1, N, abs_incx, stride_x, cx, rx, batch_count); hipblas_error_device += norm_check_general('F', 1, N, abs_incy, stride_y, cy, ry, batch_count); } } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(V), hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR((hipblasRotStridedBatchedFn( handle, N, dx, incx, stride_x, dy, incy, stride_y, dc, ds, batch_count))); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, rot_gflop_count(N), rot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rot_strided_batched_ex.hpp000066400000000000000000000327021434647641600257470ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotStridedBatchedExModel = ArgumentModel; inline void testname_rot_strided_batched_ex(const Arguments& arg, std::string& name) { hipblasRotStridedBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rot_strided_batched_ex_template(const Arguments& arg) { using Ty = Tx; bool FORTRAN = arg.fortran; auto hipblasRotStridedBatchedExFn = FORTRAN ? hipblasRotStridedBatchedExFortran : hipblasRotStridedBatchedEx; int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stridex = N * abs_incx * stride_scale; hipblasStride stridey = N * abs_incy * stride_scale; size_t size_x = stridex * batch_count; size_t size_y = stridey * batch_count; if(!size_x) size_x = 1; if(!size_y) size_y = 1; hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t csType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasRotStridedBatchedExFn(handle, N, nullptr, xType, incx, stridex, nullptr, yType, incy, stridey, nullptr, nullptr, csType, batch_count, executionType)); return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; device_vector dx(size_x); device_vector dy(size_y); device_vector dc(1); device_vector ds(1); // Initial Data on CPU host_vector hx_host(size_x); host_vector hy_host(size_y); host_vector hx_device(size_x); host_vector hy_device(size_y); host_vector hx_cpu(size_x); host_vector hy_cpu(size_y); host_vector hc(1); host_vector hs(1); hipblas_init_vector( hx_host, arg, N, abs_incx, stridex, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hy_host, arg, N, abs_incy, stridey, batch_count, hipblas_client_never_set_nan, false); hipblas_init_vector(hc, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); hipblas_init_vector(hs, arg, 1, 1, 0, 1, hipblas_client_never_set_nan, false); hx_cpu = hx_device = hx_host; hy_cpu = hy_device = hy_host; CHECK_HIP_ERROR(hipMemcpy(dx, hx_host, sizeof(Tx) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy_host, sizeof(Ty) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(Tcs), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(Tcs), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasRotStridedBatchedExFn(handle, N, dx, xType, incx, stridex, dy, yType, incy, stridey, hc, hs, csType, batch_count, executionType)); CHECK_HIP_ERROR(hipMemcpy(hx_host, dx, sizeof(Tx) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hy_host, dy, sizeof(Ty) * size_y, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dx, hx_device, sizeof(Tx) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy_device, sizeof(Ty) * size_y, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasRotStridedBatchedExFn(handle, N, dx, xType, incx, stridex, dy, yType, incy, stridey, dc, ds, csType, batch_count, executionType)); CHECK_HIP_ERROR(hipMemcpy(hx_device, dx, sizeof(Tx) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hy_device, dy, sizeof(Ty) * size_y, hipMemcpyDeviceToHost)); for(int b = 0; b < batch_count; b++) { cblas_rot( N, hx_cpu.data() + b * stridex, incx, hy_cpu.data() + b * stridey, incy, *hc, *hs); } if(arg.unit_check) { unit_check_general(1, N, batch_count, abs_incx, stridex, hx_cpu, hx_host); unit_check_general(1, N, batch_count, abs_incy, stridey, hy_cpu, hy_host); unit_check_general(1, N, batch_count, abs_incx, stridex, hx_cpu, hx_device); unit_check_general(1, N, batch_count, abs_incy, stridey, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, N, abs_incx, stridex, hx_cpu, hx_host, batch_count); hipblas_error_host += norm_check_general( 'F', 1, N, abs_incy, stridey, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, N, abs_incx, stridex, hx_cpu, hx_device, batch_count); hipblas_error_device += norm_check_general( 'F', 1, N, abs_incy, stridey, hy_cpu, hy_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasRotStridedBatchedExFn(handle, N, dx, xType, incx, stridex, dy, yType, incy, stridey, dc, ds, csType, batch_count, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotStridedBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, rot_gflop_count(N), rot_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_rot_strided_batched_ex(Arguments arg) { hipblasDatatype_t xType = arg.a_type; hipblasDatatype_t yType = arg.b_type; hipblasDatatype_t csType = arg.c_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(executionType == HIPBLAS_R_32F && xType == yType && xType == HIPBLAS_R_16B && csType == HIPBLAS_R_16B) { status = testing_rot_strided_batched_ex_template(arg); } else if(executionType == HIPBLAS_R_32F && xType == yType && xType == HIPBLAS_R_16F && csType == HIPBLAS_R_16F) { status = testing_rot_strided_batched_ex_template(arg); } else if(executionType == HIPBLAS_R_32F && xType == yType && xType == HIPBLAS_R_32F && csType == HIPBLAS_R_32F) { status = testing_rot_strided_batched_ex_template(arg); } else if(executionType == HIPBLAS_R_64F && xType == yType && xType == HIPBLAS_R_64F && csType == HIPBLAS_R_64F) { status = testing_rot_strided_batched_ex_template(arg); } else if(executionType == HIPBLAS_C_32F && xType == yType && xType == HIPBLAS_C_32F && csType == HIPBLAS_R_32F) { status = testing_rot_strided_batched_ex_template(arg); } else if(executionType == HIPBLAS_C_32F && xType == yType && xType == HIPBLAS_C_32F && csType == HIPBLAS_C_32F) { status = testing_rot_strided_batched_ex_template(arg); } else if(executionType == HIPBLAS_C_64F && xType == yType && xType == HIPBLAS_C_64F && csType == HIPBLAS_R_64F) { status = testing_rot_strided_batched_ex_template(arg); } else if(executionType == HIPBLAS_C_64F && xType == yType && xType == HIPBLAS_C_64F && csType == HIPBLAS_C_64F) { status = testing_rot_strided_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_rotg.hpp000066400000000000000000000142161434647641600222320ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotgModel = ArgumentModel<>; inline void testname_rotg(const Arguments& arg, std::string& name) { hipblasRotgModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rotg(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasRotgFn = FORTRAN ? hipblasRotg : hipblasRotg; double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); const U rel_error = std::numeric_limits::epsilon() * 1000; host_vector ha(1); host_vector hb(1); host_vector hc(1); host_vector hs(1); // Initial data on CPU hipblas_init_vector(ha, arg, 1, 1, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hb, arg, 1, 1, 0, 1, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hc, arg, 1, 1, 0, 1, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hs, arg, 1, 1, 0, 1, hipblas_client_alpha_sets_nan, false); // CPU BLAS host_vector ca = ha; host_vector cb = hb; host_vector cc = hc; host_vector cs = hs; // result hipBLAS device host_vector ra = ha; host_vector rb = hb; host_vector rc = hc; host_vector rs = hs; device_vector da(1); device_vector db(1); device_vector dc(1); device_vector ds(1); CHECK_HIP_ERROR(hipMemcpy(da, ha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(db, hb, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(U), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR((hipblasRotgFn(handle, ha, hb, hc, hs))); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR((hipblasRotgFn(handle, da, db, dc, ds))); CHECK_HIP_ERROR(hipMemcpy(ra, da, sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(rb, db, sizeof(T), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(rc, dc, sizeof(U), hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(rs, ds, sizeof(T), hipMemcpyDeviceToHost)); cblas_rotg(ca, cb, cc, cs); if(arg.unit_check) { near_check_general(1, 1, 1, ca.data(), ha.data(), rel_error); near_check_general(1, 1, 1, cb.data(), hb.data(), rel_error); near_check_general(1, 1, 1, cc.data(), hc.data(), rel_error); near_check_general(1, 1, 1, cs.data(), hs.data(), rel_error); near_check_general(1, 1, 1, ca.data(), ra.data(), rel_error); near_check_general(1, 1, 1, cb.data(), rb.data(), rel_error); near_check_general(1, 1, 1, cc.data(), rc.data(), rel_error); near_check_general(1, 1, 1, cs.data(), rs.data(), rel_error); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, 1, 1, ca, ha); hipblas_error_host += norm_check_general('F', 1, 1, 1, cb, hb); hipblas_error_host += norm_check_general('F', 1, 1, 1, cc, hc); hipblas_error_host += norm_check_general('F', 1, 1, 1, cs, hs); hipblas_error_device = norm_check_general('F', 1, 1, 1, ca, ra); hipblas_error_device += norm_check_general('F', 1, 1, 1, cb, rb); hipblas_error_device += norm_check_general('F', 1, 1, 1, cc, rc); hipblas_error_device += norm_check_general('F', 1, 1, 1, cs, rs); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR((hipblasRotgFn(handle, da, db, dc, ds))); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotgModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, ArgumentLogging::NA_value, hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rotg_batched.hpp000066400000000000000000000175341434647641600237120ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotgBatchedModel = ArgumentModel; inline void testname_rotg_batched(const Arguments& arg, std::string& name) { hipblasRotgBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rotg_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasRotgBatchedFn = FORTRAN ? hipblasRotgBatched : hipblasRotgBatched; int batch_count = arg.batch_count; const U rel_error = std::numeric_limits::epsilon() * 1000; // check to prevent undefined memory allocation error if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } else if(batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU // host vectors for hipblas host result host_batch_vector ha(1, 1, batch_count); host_batch_vector hb(1, 1, batch_count); host_batch_vector hc(1, 1, batch_count); host_batch_vector hs(1, 1, batch_count); // host vectors for cblas host_batch_vector ca(1, 1, batch_count); host_batch_vector cb(1, 1, batch_count); host_batch_vector cc(1, 1, batch_count); host_batch_vector cs(1, 1, batch_count); // host vectors for hipblas device result host_batch_vector ra(1, 1, batch_count); host_batch_vector rb(1, 1, batch_count); host_batch_vector rc(1, 1, batch_count); host_batch_vector rs(1, 1, batch_count); // device vectors for hipblas device device_batch_vector da(1, 1, batch_count); device_batch_vector db(1, 1, batch_count); device_batch_vector dc(1, 1, batch_count); device_batch_vector ds(1, 1, batch_count); hipblas_init_vector(ha, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hb, arg, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hc, arg, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hs, arg, hipblas_client_alpha_sets_nan, false); ca.copy_from(ha); cb.copy_from(hb); cc.copy_from(hc); cs.copy_from(hs); ra.copy_from(ha); rb.copy_from(hb); rc.copy_from(hc); rs.copy_from(hs); CHECK_HIP_ERROR(da.transfer_from(ha)); CHECK_HIP_ERROR(db.transfer_from(hb)); CHECK_HIP_ERROR(dc.transfer_from(hc)); CHECK_HIP_ERROR(ds.transfer_from(hs)); if(arg.unit_check || arg.norm_check) { // hipBLAS // test host CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasRotgBatchedFn(handle, ha, hb, hc, hs, batch_count)); // test device CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR((hipblasRotgBatchedFn(handle, da.ptr_on_device(), db.ptr_on_device(), dc.ptr_on_device(), ds.ptr_on_device(), batch_count))); CHECK_HIP_ERROR(ra.transfer_from(da)); CHECK_HIP_ERROR(rb.transfer_from(db)); CHECK_HIP_ERROR(rc.transfer_from(dc)); CHECK_HIP_ERROR(rs.transfer_from(ds)); // CBLAS for(int b = 0; b < batch_count; b++) { cblas_rotg(ca[b], cb[b], cc[b], cs[b]); } if(arg.unit_check) { for(int b = 0; b < batch_count; b++) { near_check_general(1, 1, 1, ca[b], ha[b], rel_error); near_check_general(1, 1, 1, cb[b], hb[b], rel_error); near_check_general(1, 1, 1, cc[b], hc[b], rel_error); near_check_general(1, 1, 1, cs[b], hs[b], rel_error); near_check_general(1, 1, 1, ca[b], ra[b], rel_error); near_check_general(1, 1, 1, cb[b], rb[b], rel_error); near_check_general(1, 1, 1, cc[b], rc[b], rel_error); near_check_general(1, 1, 1, cs[b], rs[b], rel_error); } } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, 1, 1, ca, ha, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, cb, hb, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, cc, hc, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, cs, hs, batch_count); hipblas_error_device = norm_check_general('F', 1, 1, 1, ca, ra, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, cb, rb, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, cc, rc, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, cs, rs, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR((hipblasRotgBatchedFn(handle, da.ptr_on_device(), db.ptr_on_device(), dc.ptr_on_device(), ds.ptr_on_device(), batch_count))); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotgBatchedModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, ArgumentLogging::NA_value, hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rotg_strided_batched.hpp000066400000000000000000000202151434647641600254160ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotgStridedBatchedModel = ArgumentModel; inline void testname_rotg_strided_batched(const Arguments& arg, std::string& name) { hipblasRotgStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rotg_strided_batched(const Arguments& arg) { using U = real_t; bool FORTRAN = arg.fortran; auto hipblasRotgStridedBatchedFn = FORTRAN ? hipblasRotgStridedBatched : hipblasRotgStridedBatched; double stride_scale = arg.stride_scale; hipblasStride stride_a = stride_scale; hipblasStride stride_b = stride_scale; hipblasStride stride_c = stride_scale; hipblasStride stride_s = stride_scale; int batch_count = arg.batch_count; const U rel_error = std::numeric_limits::epsilon() * 1000; // check to prevent undefined memory allocation error if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } else if(batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); size_t size_a = size_t(stride_a) * size_t(batch_count); size_t size_b = size_t(stride_b) * size_t(batch_count); size_t size_c = size_t(stride_c) * size_t(batch_count); size_t size_s = size_t(stride_s) * size_t(batch_count); host_vector ha(size_a); host_vector hb(size_b); host_vector hc(size_c); host_vector hs(size_s); // Initial data on CPU hipblas_init_vector(ha, arg, 1, 1, stride_a, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hb, arg, 1, 1, stride_b, batch_count, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hc, arg, 1, 1, stride_c, batch_count, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hs, arg, 1, 1, stride_s, batch_count, hipblas_client_alpha_sets_nan, false); // CPU_BLAS host_vector ca = ha; host_vector cb = hb; host_vector cc = hc; host_vector cs = hs; // result vector for hipBLAS device host_vector ra = ha; host_vector rb = hb; host_vector rc = hc; host_vector rs = hs; device_vector da(size_a); device_vector db(size_b); device_vector dc(size_c); device_vector ds(size_s); CHECK_HIP_ERROR(hipMemcpy(da, ha, sizeof(T) * size_a, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(db, hb, sizeof(T) * size_b, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dc, hc, sizeof(U) * size_c, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(ds, hs, sizeof(T) * size_s, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR((hipblasRotgStridedBatchedFn( handle, ha, stride_a, hb, stride_b, hc, stride_c, hs, stride_s, batch_count))); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR((hipblasRotgStridedBatchedFn( handle, da, stride_a, db, stride_b, dc, stride_c, ds, stride_s, batch_count))); CHECK_HIP_ERROR(hipMemcpy(ra, da, sizeof(T) * size_a, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(rb, db, sizeof(T) * size_b, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(rc, dc, sizeof(U) * size_c, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(rs, ds, sizeof(T) * size_s, hipMemcpyDeviceToHost)); for(int b = 0; b < batch_count; b++) { cblas_rotg(ca.data() + b * stride_a, cb.data() + b * stride_b, cc.data() + b * stride_c, cs.data() + b * stride_s); } if(arg.unit_check) { near_check_general(1, 1, batch_count, 1, stride_a, ca, ha, rel_error); near_check_general(1, 1, batch_count, 1, stride_b, cb, hb, rel_error); near_check_general(1, 1, batch_count, 1, stride_c, cc, hc, rel_error); near_check_general(1, 1, batch_count, 1, stride_s, cs, hs, rel_error); near_check_general(1, 1, batch_count, 1, stride_a, ca, ra, rel_error); near_check_general(1, 1, batch_count, 1, stride_b, cb, rb, rel_error); near_check_general(1, 1, batch_count, 1, stride_c, cc, rc, rel_error); near_check_general(1, 1, batch_count, 1, stride_s, cs, rs, rel_error); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, 1, 1, stride_a, ca, ha, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, stride_b, cb, hb, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, stride_c, cc, hc, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, stride_s, cs, hs, batch_count); hipblas_error_device = norm_check_general('F', 1, 1, 1, stride_a, ca, ra, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, stride_b, cb, rb, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, stride_c, cc, rc, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, stride_s, cs, rs, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR((hipblasRotgStridedBatchedFn( handle, da, stride_a, db, stride_b, dc, stride_c, ds, stride_s, batch_count))); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotgStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, ArgumentLogging::NA_value, hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rotm.hpp000066400000000000000000000164351434647641600222450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotmModel = ArgumentModel; inline void testname_rotm(const Arguments& arg, std::string& name) { hipblasRotmModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rotm(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasRotmFn = FORTRAN ? hipblasRotm : hipblasRotm; int N = arg.N; int incx = arg.incx; int incy = arg.incy; const T rel_error = std::numeric_limits::epsilon() * 1000; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0) { CHECK_HIPBLAS_ERROR(hipblasRotmFn(handle, N, nullptr, incx, nullptr, incy, nullptr)); return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t size_x = N * size_t(abs_incx); size_t size_y = N * size_t(abs_incy); if(!size_x) size_x = 1; if(!size_y) size_y = 1; device_vector dx(size_x); device_vector dy(size_y); device_vector dparam(5); // Initial Data on CPU host_vector hx(size_x); host_vector hy(size_y); host_vector hdata(4); host_vector hparam(5); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hdata, arg, 4, 1, 0, 1, hipblas_client_alpha_sets_nan, false); // CPU BLAS reference data cblas_rotmg(&hdata[0], &hdata[1], &hdata[2], &hdata[3], hparam); const int FLAG_COUNT = 4; const T FLAGS[FLAG_COUNT] = {-1, 0, 1, -2}; for(int i = 0; i < FLAG_COUNT; ++i) { if(arg.unit_check || arg.norm_check) { hparam[0] = FLAGS[i]; host_vector cx = hx; host_vector cy = hy; cblas_rotm(N, cx, incx, cy, incy, hparam); // Test host { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasRotmFn(handle, N, dx, incx, dy, incy, hparam)); host_vector rx(size_x); host_vector ry(size_y); CHECK_HIP_ERROR(hipMemcpy(rx, dx, sizeof(T) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(ry, dy, sizeof(T) * size_y, hipMemcpyDeviceToHost)); if(arg.unit_check) { near_check_general(1, N, abs_incx, cx.data(), rx.data(), rel_error); near_check_general(1, N, abs_incy, cy.data(), ry.data(), rel_error); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, N, abs_incx, cx, rx); hipblas_error_host += norm_check_general('F', 1, N, abs_incy, cy, ry); } } // Test device { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dparam, hparam, sizeof(T) * 5, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasRotmFn(handle, N, dx, incx, dy, incy, dparam)); host_vector rx(size_x); host_vector ry(size_y); CHECK_HIP_ERROR(hipMemcpy(rx, dx, sizeof(T) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(ry, dy, sizeof(T) * size_y, hipMemcpyDeviceToHost)); if(arg.unit_check) { near_check_general(1, N, abs_incx, cx.data(), rx.data(), rel_error); near_check_general(1, N, abs_incy, cy.data(), ry.data(), rel_error); } if(arg.norm_check) { hipblas_error_device = norm_check_general('F', 1, N, abs_incx, cx, rx); hipblas_error_device += norm_check_general('F', 1, N, abs_incy, cy, ry); } } } } if(arg.timing) { hparam[0] = 0; hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dparam, hparam, sizeof(T) * 5, hipMemcpyHostToDevice)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasRotmFn(handle, N, dx, incx, dy, incy, dparam)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotmModel{}.log_args(std::cout, arg, gpu_time_used, rotm_gflop_count(N, hparam[0]), rotm_gbyte_count(N, hparam[0]), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rotm_batched.hpp000066400000000000000000000163141434647641600237130ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotmBatchedModel = ArgumentModel; inline void testname_rotm_batched(const Arguments& arg, std::string& name) { hipblasRotmBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rotm_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasRotmBatchedFn = FORTRAN ? hipblasRotmBatched : hipblasRotmBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; const T rel_error = std::numeric_limits::epsilon() * 1000; hipblasLocalHandle handle(arg); // check to prevent undefined memory allocation error if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR( hipblasRotmBatchedFn(handle, N, nullptr, incx, nullptr, incy, nullptr, batch_count)); return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; double gpu_time_used, hipblas_error_device; device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_batch_vector dparam(5, 1, batch_count); host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); host_batch_vector hdata(4, 1, batch_count); host_batch_vector hparam(5, 1, batch_count); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy, arg, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hdata, arg, hipblas_client_alpha_sets_nan, false); for(int b = 0; b < batch_count; b++) { cblas_rotmg(&hdata[b][0], &hdata[b][1], &hdata[b][2], &hdata[b][3], hparam[b]); } constexpr int FLAG_COUNT = 4; const T FLAGS[FLAG_COUNT] = {-1, 0, 1, -2}; for(int i = 0; i < FLAG_COUNT; i++) { if(arg.unit_check || arg.norm_check) { for(int b = 0; b < batch_count; b++) hparam[b][0] = FLAGS[i]; // Test device CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(dparam.transfer_from(hparam)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasRotmBatchedFn(handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dparam.ptr_on_device(), batch_count)); host_batch_vector rx(N, incx, batch_count); host_batch_vector ry(N, incy, batch_count); CHECK_HIP_ERROR(rx.transfer_from(dx)); CHECK_HIP_ERROR(ry.transfer_from(dy)); host_batch_vector cx(N, incx, batch_count); host_batch_vector cy(N, incy, batch_count); cx.copy_from(hx); cy.copy_from(hy); for(int b = 0; b < batch_count; b++) { // CPU BLAS reference data cblas_rotm(N, cx[b], incx, cy[b], incy, hparam[b]); } if(arg.unit_check) { for(int b = 0; b < batch_count; b++) { near_check_general(1, N, abs_incx, cx[b], rx[b], rel_error); near_check_general(1, N, abs_incy, cy[b], ry[b], rel_error); } } if(arg.norm_check) { hipblas_error_device = norm_check_general('F', 1, N, abs_incx, cx, rx, batch_count); hipblas_error_device += norm_check_general('F', 1, N, abs_incy, cy, ry, batch_count); } } } if(arg.timing) { for(int b = 0; b < batch_count; b++) hparam[b][0] = 0; hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(dparam.transfer_from(hparam)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasRotmBatchedFn(handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dparam.ptr_on_device(), batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotmBatchedModel{}.log_args(std::cout, arg, gpu_time_used, rotm_gflop_count(N, 0), rotm_gbyte_count(N, 0), 0, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rotm_strided_batched.hpp000066400000000000000000000225211434647641600254260ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotmStridedBatchedModel = ArgumentModel; inline void testname_rotm_strided_batched(const Arguments& arg, std::string& name) { hipblasRotmStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rotm_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasRotmStridedBatchedFn = FORTRAN ? hipblasRotmStridedBatched : hipblasRotmStridedBatched; double stride_scale = arg.stride_scale; int N = arg.N; int incx = arg.incx; int incy = arg.incy; hipblasStride stride_param = 5 * stride_scale; int batch_count = arg.batch_count; const T rel_error = std::numeric_limits::epsilon() * 1000; hipblasLocalHandle handle(arg); int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stride_x = N * abs_incx * stride_scale; hipblasStride stride_y = N * abs_incy * stride_scale; // check to prevent undefined memory allocation error if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR((hipblasRotmStridedBatchedFn(handle, N, nullptr, incx, stride_x, nullptr, incy, stride_y, nullptr, stride_param, batch_count))); return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_device; size_t size_x = N * size_t(abs_incx) + size_t(stride_x) * size_t(batch_count - 1); size_t size_y = N * size_t(abs_incy) + size_t(stride_y) * size_t(batch_count - 1); size_t size_param = 5 + size_t(stride_param) * size_t(batch_count - 1); if(!size_x) size_x = 1; if(!size_y) size_y = 1; device_vector dx(size_x); device_vector dy(size_y); device_vector dparam(size_param); // Initial Data on CPU host_vector hx(size_x); host_vector hy(size_y); host_vector hdata(4 * batch_count); host_vector hparam(size_param); hipblas_init_vector( hx, arg, N, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector( hy, arg, N, abs_incy, stride_y, batch_count, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hdata, arg, 4, 1, 4, batch_count, hipblas_client_alpha_sets_nan, false); for(int b = 0; b < batch_count; b++) cblas_rotmg(hdata + b * 4, hdata + b * 4 + 1, hdata + b * 4 + 2, hdata + b * 4 + 3, hparam + b * stride_param); constexpr int FLAG_COUNT = 4; const T FLAGS[FLAG_COUNT] = {-1, 0, 1, -2}; for(int i = 0; i < FLAG_COUNT; i++) { if(arg.unit_check || arg.norm_check) { for(int b = 0; b < batch_count; b++) (hparam + b * stride_param)[0] = FLAGS[i]; // Test device CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR( hipMemcpy(dparam, hparam, sizeof(T) * size_param, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR((hipblasRotmStridedBatchedFn(handle, N, dx, incx, stride_x, dy, incy, stride_y, dparam, stride_param, batch_count))); host_vector rx(size_x); host_vector ry(size_y); CHECK_HIP_ERROR(hipMemcpy(rx, dx, sizeof(T) * size_x, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(ry, dy, sizeof(T) * size_y, hipMemcpyDeviceToHost)); host_vector cx = hx; host_vector cy = hy; // CPU BLAS reference data for(int b = 0; b < batch_count; b++) { cblas_rotm( N, cx + b * stride_x, incx, cy + b * stride_y, incy, hparam + b * stride_param); } if(arg.unit_check) { near_check_general(1, N, batch_count, abs_incx, stride_x, cx, rx, rel_error); near_check_general(1, N, batch_count, abs_incy, stride_y, cy, ry, rel_error); } if(arg.norm_check) { hipblas_error_device = norm_check_general('F', 1, N, abs_incx, stride_x, cx, rx, batch_count); hipblas_error_device += norm_check_general('F', 1, N, abs_incy, stride_y, cy, ry, batch_count); } } } if(arg.timing) { for(int b = 0; b < batch_count; b++) (hparam + b * stride_param)[0] = 0; hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dx, hx, sizeof(T) * size_x, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy, sizeof(T) * size_y, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dparam, hparam, sizeof(T) * size_param, hipMemcpyHostToDevice)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR((hipblasRotmStridedBatchedFn(handle, N, dx, incx, stride_x, dy, incy, stride_y, dparam, stride_param, batch_count))); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotmStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, rotm_gflop_count(N, 0), rotm_gbyte_count(N, 0), 0, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rotmg.hpp000066400000000000000000000110041434647641600223770ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotmgModel = ArgumentModel<>; inline void testname_rotmg(const Arguments& arg, std::string& name) { hipblasRotmgModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rotmg(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasRotmgFn = FORTRAN ? hipblasRotmg : hipblasRotmg; double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); host_vector hparams(9); const T rel_error = std::numeric_limits::epsilon() * 1000; // Initial data on CPU hipblas_init_vector(hparams, arg, 9, 1, 0, 1, hipblas_client_alpha_sets_nan, true); host_vector cparams = hparams; host_vector hparams_d = hparams; device_vector dparams(9); CHECK_HIP_ERROR(hipMemcpy(dparams, hparams, 9 * sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasRotmgFn( handle, &hparams[0], &hparams[1], &hparams[2], &hparams[3], &hparams[4])); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasRotmgFn(handle, dparams, dparams + 1, dparams + 2, dparams + 3, dparams + 4)); CHECK_HIP_ERROR(hipMemcpy(hparams_d, dparams, 9 * sizeof(T), hipMemcpyDeviceToHost)); // CPU BLAS cblas_rotmg(&cparams[0], &cparams[1], &cparams[2], &cparams[3], &cparams[4]); if(arg.unit_check) { near_check_general(1, 9, 1, cparams.data(), hparams.data(), rel_error); near_check_general(1, 9, 1, cparams.data(), hparams_d.data(), rel_error); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, 9, 1, cparams, hparams); hipblas_error_device = norm_check_general('F', 1, 9, 1, cparams, hparams_d); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasRotmgFn( handle, dparams, dparams + 1, dparams + 2, dparams + 3, dparams + 4)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotmgModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, ArgumentLogging::NA_value, hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rotmg_batched.hpp000066400000000000000000000216571434647641600240700ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotmgBatchedModel = ArgumentModel; inline void testname_rotmg_batched(const Arguments& arg, std::string& name) { hipblasRotmgBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rotmg_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasRotmgBatchedFn = FORTRAN ? hipblasRotmgBatched : hipblasRotmgBatched; int batch_count = arg.batch_count; T rel_error = std::numeric_limits::epsilon() * 1000; // check to prevent undefined memory allocation error if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } else if(batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU // host data for hipBLAS host test host_batch_vector hd1(1, 1, batch_count); host_batch_vector hd2(1, 1, batch_count); host_batch_vector hx1(1, 1, batch_count); host_batch_vector hy1(1, 1, batch_count); host_batch_vector hparams(5, 1, batch_count); // host data for CBLAS test host_batch_vector cd1(1, 1, batch_count); host_batch_vector cd2(1, 1, batch_count); host_batch_vector cx1(1, 1, batch_count); host_batch_vector cy1(1, 1, batch_count); host_batch_vector cparams(5, 1, batch_count); // host data for hipBLAS device test host_batch_vector hd1_d(1, 1, batch_count); host_batch_vector hd2_d(1, 1, batch_count); host_batch_vector hx1_d(1, 1, batch_count); host_batch_vector hy1_d(1, 1, batch_count); host_batch_vector hparams_d(5, 1, batch_count); // device data for hipBLAS device test device_batch_vector dd1(1, 1, batch_count); device_batch_vector dd2(1, 1, batch_count); device_batch_vector dx1(1, 1, batch_count); device_batch_vector dy1(1, 1, batch_count); device_batch_vector dparams(5, 1, batch_count); hipblas_init_vector(hd1, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hd2, arg, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hx1, arg, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hy1, arg, hipblas_client_alpha_sets_nan, false); hipblas_init_vector(hparams, arg, hipblas_client_alpha_sets_nan, false); cd1.copy_from(hd1); cd2.copy_from(hd2); cx1.copy_from(hx1); cy1.copy_from(hy1); cparams.copy_from(hparams); hd1_d.copy_from(hd1); hd2_d.copy_from(hd2); hx1_d.copy_from(hx1); hy1_d.copy_from(hy1); hparams_d.copy_from(hparams); CHECK_HIP_ERROR(dd1.transfer_from(hd1)); CHECK_HIP_ERROR(dd2.transfer_from(hd2)); CHECK_HIP_ERROR(dx1.transfer_from(hx1)); CHECK_HIP_ERROR(dy1.transfer_from(hy1)); CHECK_HIP_ERROR(dparams.transfer_from(hparams)); if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasRotmgBatchedFn(handle, hd1, hd2, hx1, hy1, hparams, batch_count)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasRotmgBatchedFn(handle, dd1.ptr_on_device(), dd2.ptr_on_device(), dx1.ptr_on_device(), dy1.ptr_on_device(), dparams.ptr_on_device(), batch_count)); CHECK_HIP_ERROR(hd1_d.transfer_from(dd1)); CHECK_HIP_ERROR(hd2_d.transfer_from(dd2)); CHECK_HIP_ERROR(hx1_d.transfer_from(dx1)); CHECK_HIP_ERROR(hy1_d.transfer_from(dy1)); CHECK_HIP_ERROR(hparams_d.transfer_from(dparams)); // CBLAS for(int b = 0; b < batch_count; b++) { cblas_rotmg(cd1[b], cd2[b], cx1[b], cy1[b], cparams[b]); } if(arg.unit_check) { for(int b = 0; b < batch_count; b++) { near_check_general(1, 1, 1, cd1[b], hd1[b], rel_error); near_check_general(1, 1, 1, cd2[b], hd2[b], rel_error); near_check_general(1, 1, 1, cx1[b], hx1[b], rel_error); near_check_general(1, 1, 1, cy1[b], hy1[b], rel_error); near_check_general(1, 5, 1, cparams[b], hparams[b], rel_error); near_check_general(1, 1, 1, cd1[b], hd1_d[b], rel_error); near_check_general(1, 1, 1, cd2[b], hd2_d[b], rel_error); near_check_general(1, 1, 1, cx1[b], hx1_d[b], rel_error); near_check_general(1, 1, 1, cy1[b], hy1_d[b], rel_error); near_check_general(1, 5, 1, cparams[b], hparams_d[b], rel_error); } } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, 1, 1, cd1, hd1, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, cd2, hd2, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, cx1, hx1, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, cy1, hy1, batch_count); hipblas_error_host += norm_check_general('F', 1, 5, 1, cparams, hparams, batch_count); hipblas_error_device = norm_check_general('F', 1, 1, 1, cd1, hd1_d, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, cd2, hd2_d, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, cx1, hx1_d, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, cy1, hy1_d, batch_count); hipblas_error_device += norm_check_general('F', 1, 5, 1, cparams, hparams_d, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasRotmgBatchedFn(handle, dd1.ptr_on_device(), dd2.ptr_on_device(), dx1.ptr_on_device(), dy1.ptr_on_device(), dparams.ptr_on_device(), batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotmgBatchedModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, ArgumentLogging::NA_value, hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_rotmg_strided_batched.hpp000066400000000000000000000266321434647641600256040ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasRotmgStridedBatchedModel = ArgumentModel; inline void testname_rotmg_strided_batched(const Arguments& arg, std::string& name) { hipblasRotmgStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_rotmg_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasRotmgStridedBatchedFn = FORTRAN ? hipblasRotmgStridedBatched : hipblasRotmgStridedBatched; int batch_count = arg.batch_count; double stride_scale = arg.stride_scale; hipblasStride stride_d1 = stride_scale; hipblasStride stride_d2 = stride_scale; hipblasStride stride_x1 = stride_scale; hipblasStride stride_y1 = stride_scale; hipblasStride stride_param = 5 * stride_scale; const T rel_error = std::numeric_limits::epsilon() * 1000; // check to prevent undefined memory allocation error if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } else if(batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); size_t size_d1 = batch_count * stride_d1; size_t size_d2 = batch_count * stride_d2; size_t size_x1 = batch_count * stride_x1; size_t size_y1 = batch_count * stride_y1; size_t size_param = batch_count * stride_param; // Initial Data on CPU // host data for hipBLAS host test host_vector hd1(size_d1); host_vector hd2(size_d2); host_vector hx1(size_x1); host_vector hy1(size_y1); host_vector hparams(size_param); hipblas_init_vector( hparams, arg, 5, 1, stride_param, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector( hd1, arg, 1, 1, stride_d1, batch_count, hipblas_client_alpha_sets_nan, false); hipblas_init_vector( hd2, arg, 1, 1, stride_d2, batch_count, hipblas_client_alpha_sets_nan, false); hipblas_init_vector( hx1, arg, 1, 1, stride_x1, batch_count, hipblas_client_alpha_sets_nan, false); hipblas_init_vector( hy1, arg, 1, 1, stride_y1, batch_count, hipblas_client_alpha_sets_nan, false); // host data for CBLAS test host_vector cparams = hparams; host_vector cd1 = hd1; host_vector cd2 = hd2; host_vector cx1 = hx1; host_vector cy1 = hy1; // host data for hipBLAS device test host_vector hd1_d(size_d1); host_vector hd2_d(size_d2); host_vector hx1_d(size_x1); host_vector hy1_d(size_y1); host_vector hparams_d(size_param); // device data for hipBLAS device test device_vector dd1(size_d1); device_vector dd2(size_d2); device_vector dx1(size_x1); device_vector dy1(size_y1); device_vector dparams(size_param); CHECK_HIP_ERROR(hipMemcpy(dd1, hd1, sizeof(T) * size_d1, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dd2, hd2, sizeof(T) * size_d2, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx1, hx1, sizeof(T) * size_x1, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy1, hy1, sizeof(T) * size_y1, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dparams, hparams, sizeof(T) * size_param, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasRotmgStridedBatchedFn(handle, hd1, stride_d1, hd2, stride_d2, hx1, stride_x1, hy1, stride_y1, hparams, stride_param, batch_count)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasRotmgStridedBatchedFn(handle, dd1, stride_d1, dd2, stride_d2, dx1, stride_x1, dy1, stride_y1, dparams, stride_param, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hd1_d, dd1, sizeof(T) * size_d1, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hd2_d, dd2, sizeof(T) * size_d2, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hx1_d, dx1, sizeof(T) * size_x1, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hy1_d, dy1, sizeof(T) * size_y1, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR( hipMemcpy(hparams_d, dparams, sizeof(T) * size_param, hipMemcpyDeviceToHost)); for(int b = 0; b < batch_count; b++) { cblas_rotmg(cd1 + b * stride_d1, cd2 + b * stride_d2, cx1 + b * stride_x1, cy1 + b * stride_y1, cparams + b * stride_param); } if(arg.unit_check) { near_check_general(1, 1, batch_count, 1, stride_d1, cd1, hd1, rel_error); near_check_general(1, 1, batch_count, 1, stride_d2, cd2, hd2, rel_error); near_check_general(1, 1, batch_count, 1, stride_x1, cx1, hx1, rel_error); near_check_general(1, 1, batch_count, 1, stride_y1, cy1, hy1, rel_error); near_check_general(1, 5, batch_count, 1, stride_param, cparams, hparams, rel_error); near_check_general(1, 1, batch_count, 1, stride_d1, cd1, hd1_d, rel_error); near_check_general(1, 1, batch_count, 1, stride_d2, cd2, hd2_d, rel_error); near_check_general(1, 1, batch_count, 1, stride_x1, cx1, hx1_d, rel_error); near_check_general(1, 1, batch_count, 1, stride_y1, cy1, hy1_d, rel_error); near_check_general( 1, 5, batch_count, 1, stride_param, cparams, hparams_d, rel_error); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, 1, 1, stride_d1, cd1, hd1, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, stride_d2, cd2, hd2, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, stride_x1, cx1, hx1, batch_count); hipblas_error_host += norm_check_general('F', 1, 1, 1, stride_y1, cy1, hy1, batch_count); hipblas_error_host += norm_check_general('F', 1, 5, 1, stride_param, cparams, hparams, batch_count); hipblas_error_device = norm_check_general('F', 1, 1, 1, stride_d1, cd1, hd1_d, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, stride_d2, cd2, hd2_d, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, stride_x1, cx1, hx1_d, batch_count); hipblas_error_device += norm_check_general('F', 1, 1, 1, stride_y1, cy1, hy1_d, batch_count); hipblas_error_device += norm_check_general( 'F', 1, 5, 1, stride_param, cparams, hparams_d, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasRotmgStridedBatchedFn(handle, dd1, stride_d1, dd2, stride_d2, dx1, stride_x1, dy1, stride_y1, dparams, stride_param, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasRotmgStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, ArgumentLogging::NA_value, hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_sbmv.hpp000066400000000000000000000163421434647641600222300ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSbmvModel = ArgumentModel; inline void testname_sbmv(const Arguments& arg, std::string& name) { hipblasSbmvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_sbmv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSbmvFn = FORTRAN ? hipblasSbmv : hipblasSbmv; int M = arg.M; int K = arg.K; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t x_size = size_t(M) * abs_incx; size_t y_size = size_t(M) * abs_incy; size_t A_size = size_t(lda) * M; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || K < 0 || lda < K + 1 || lda < 1 || !incx || !incy; if(invalid_size || !M) { hipblasStatus_t actual = hipblasSbmvFn( handle, uplo, M, K, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(x_size); host_vector hy(y_size); host_vector hy_cpu(y_size); host_vector hy_host(y_size); host_vector hy_device(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix(hA, arg, M, M, lda, 0, 1, hipblas_client_alpha_sets_nan, true, false); hipblas_init_vector(hx, arg, M, abs_incx, 0, 1, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, M, abs_incy, 0, 1, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hz = hy: save a copy in hz which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasSbmvFn(handle, uplo, M, K, &h_alpha, dA, lda, dx, incx, &h_beta, dy, incy)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasSbmvFn(handle, uplo, M, K, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_sbmv( uplo, M, K, h_alpha, hA.data(), lda, hx.data(), incx, h_beta, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, abs_incy, hy_cpu, hy_host); unit_check_general(1, M, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_host); hipblas_error_device = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_device); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasSbmvFn(handle, uplo, M, K, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSbmvModel{}.log_args(std::cout, arg, gpu_time_used, sbmv_gflop_count(M, K), sbmv_gbyte_count(M, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_sbmv_batched.hpp000066400000000000000000000234011434647641600236740ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSbmvBatchedModel = ArgumentModel; inline void testname_sbmv_batched(const Arguments& arg, std::string& name) { hipblasSbmvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_sbmv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSbmvBatchedFn = FORTRAN ? hipblasSbmvBatched : hipblasSbmvBatched; int M = arg.M; int K = arg.K; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; int abs_incy = incy >= 0 ? incy : -incy; size_t A_size = size_t(lda) * M; int batch_count = arg.batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || K < 0 || lda < K + 1 || lda < 1 || !incx || !incy || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasSbmvBatchedFn(handle, uplo, M, K, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(M, incx, batch_count); host_batch_vector hy(M, incy, batch_count); host_batch_vector hy_cpu(M, incy, batch_count); host_batch_vector hy_host(M, incy, batch_count); host_batch_vector hy_device(M, incy, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(M, incx, batch_count); device_batch_vector dy(M, incy, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, hipblas_client_beta_sets_nan); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSbmvBatchedFn(handle, uplo, M, K, &h_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, &h_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSbmvBatchedFn(handle, uplo, M, K, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_sbmv(uplo, M, K, h_alpha, hA[b], lda, hx[b], incx, h_beta, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, M, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(dy.transfer_from(hy)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasSbmvBatchedFn(handle, uplo, M, K, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSbmvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, sbmv_gflop_count(M, K), sbmv_gbyte_count(M, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_sbmv_strided_batched.hpp000066400000000000000000000304011434647641600254100ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSbmvStridedBatchedModel = ArgumentModel; inline void testname_sbmv_strided_batched(const Arguments& arg, std::string& name) { hipblasSbmvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_sbmv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSbmvStridedBatchedFn = FORTRAN ? hipblasSbmvStridedBatched : hipblasSbmvStridedBatched; int M = arg.M; int K = arg.K; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stride_A = size_t(lda) * M * stride_scale; hipblasStride stride_x = size_t(M) * abs_incx * stride_scale; hipblasStride stride_y = size_t(M) * abs_incy * stride_scale; size_t A_size = stride_A * batch_count; size_t X_size = stride_x * batch_count; size_t Y_size = stride_y * batch_count; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || K < 0 || lda < K + 1 || lda < 1 || !incx || !incy || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasSbmvStridedBatchedFn(handle, uplo, M, K, nullptr, nullptr, lda, stride_A, nullptr, incx, stride_x, nullptr, nullptr, incy, stride_y, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_cpu(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix( hA, arg, M, M, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, M, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, M, abs_incy, stride_y, batch_count, hipblas_client_beta_sets_nan); hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSbmvStridedBatchedFn(handle, uplo, M, K, &h_alpha, dA, lda, stride_A, dx, incx, stride_x, &h_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSbmvStridedBatchedFn(handle, uplo, M, K, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_sbmv(uplo, M, K, h_alpha, hA.data() + b * stride_A, lda, hx.data() + b * stride_x, incx, h_beta, hy_cpu.data() + b * stride_y, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incy, stride_y, hy_cpu, hy_host); unit_check_general(1, M, batch_count, abs_incy, stride_y, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, M, abs_incy, stride_y, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, M, abs_incy, stride_y, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasSbmvStridedBatchedFn(handle, uplo, M, K, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSbmvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, sbmv_gflop_count(M, K), sbmv_gbyte_count(M, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_scal.hpp000066400000000000000000000116741434647641600222060ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasScalModel = ArgumentModel; inline void testname_scal(const Arguments& arg, std::string& name) { hipblasScalModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_scal(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasScalFn = FORTRAN ? hipblasScal : hipblasScal; int N = arg.N; int incx = arg.incx; int unit_check = arg.unit_check; int timing = arg.timing; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || incx <= 0) { CHECK_HIPBLAS_ERROR(hipblasScalFn(handle, N, nullptr, nullptr, incx)); return HIPBLAS_STATUS_SUCCESS; } size_t sizeX = size_t(N) * incx; U alpha = arg.get_alpha(); // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hz(sizeX); device_vector dx(sizeX); double gpu_time_used, cpu_time_used; double hipblas_error = 0.0; // Initial Data on CPU hipblas_init_vector(hx, arg, N, incx, 0, 1, hipblas_client_alpha_sets_nan, true); // copy vector is easy in STL; hz = hx: save a copy in hz which will be output of CPU BLAS hz = hx; // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasScalFn(handle, N, &alpha, dx, incx)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx.data(), dx, sizeof(T) * sizeX, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_scal(N, alpha, hz.data(), incx); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, incx, hz.data(), hx.data()); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, N, incx, hz.data(), hx.data()); } } // end of if unit check // BLAS_1_RESULT_PRINT if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasScalFn(handle, N, &alpha, dx, incx)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasScalModel{}.log_args(std::cout, arg, gpu_time_used, scal_gflop_count(N), scal_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_scal_batched.hpp000066400000000000000000000124101434647641600236450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasScalBatchedModel = ArgumentModel; inline void testname_scal_batched(const Arguments& arg, std::string& name) { hipblasScalBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_scal_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasScalBatchedFn = FORTRAN ? hipblasScalBatched : hipblasScalBatched; int N = arg.N; int incx = arg.incx; int batch_count = arg.batch_count; int unit_check = arg.unit_check; int norm_check = arg.norm_check; int timing = arg.timing; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || incx <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasScalBatchedFn(handle, N, nullptr, nullptr, incx, batch_count)); return HIPBLAS_STATUS_SUCCESS; } size_t sizeX = size_t(N) * incx; U alpha = arg.get_alpha(); double gpu_time_used = 0.0, cpu_time_used = 0.0; double hipblas_error = 0.0; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_batch_vector hz(N, incx, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dz(N, incx, batch_count); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dz.memcheck()); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); hz.copy_from(hx); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dz.transfer_from(hx)); if(unit_check || norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR( hipblasScalBatchedFn(handle, N, &alpha, dx.ptr_on_device(), incx, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hx.transfer_from(dx)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_scal(N, alpha, hz[b], incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(unit_check) { unit_check_general(1, N, batch_count, incx, hz, hx); } if(norm_check) { hipblas_error = norm_check_general('F', 1, N, incx, hz, hx, batch_count); } } // end of if unit check if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasScalBatchedFn(handle, N, &alpha, dx.ptr_on_device(), incx, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasScalBatchedModel{}.log_args(std::cout, arg, gpu_time_used, scal_gflop_count(N), scal_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_scal_batched_ex.hpp000066400000000000000000000233461434647641600243530ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasScalBatchedExModel = ArgumentModel; inline void testname_scal_batched_ex(const Arguments& arg, std::string& name) { hipblasScalBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_scal_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasScalBatchedExFn = FORTRAN ? hipblasScalBatchedExFortran : hipblasScalBatchedEx; int N = arg.N; int incx = arg.incx; int batch_count = arg.batch_count; int unit_check = arg.unit_check; int timing = arg.timing; int norm_check = arg.norm_check; Ta h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || incx <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasScalBatchedExFn( handle, N, nullptr, alphaType, nullptr, xType, incx, batch_count, executionType)); return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx_host(N, incx, batch_count); host_batch_vector hx_device(N, incx, batch_count); host_batch_vector hx_cpu(N, incx, batch_count); device_batch_vector dx(N, incx, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dx.memcheck()); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx_host, arg, hipblas_client_alpha_sets_nan, true); hx_device.copy_from(hx_host); hx_cpu.copy_from(hx_host); CHECK_HIP_ERROR(dx.transfer_from(hx_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(Ta), hipMemcpyHostToDevice)); if(unit_check || norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasScalBatchedExFn(handle, N, &h_alpha, alphaType, dx.ptr_on_device(), xType, incx, batch_count, executionType)); CHECK_HIP_ERROR(hx_host.transfer_from(dx)); CHECK_HIP_ERROR(dx.transfer_from(hx_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasScalBatchedExFn(handle, N, d_alpha, alphaType, dx.ptr_on_device(), xType, incx, batch_count, executionType)); CHECK_HIP_ERROR(hx_device.transfer_from(dx)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_scal(N, h_alpha, hx_cpu[b], incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(unit_check) { unit_check_general(1, N, batch_count, incx, hx_cpu, hx_host); unit_check_general(1, N, batch_count, incx, hx_cpu, hx_device); } if(norm_check) { hipblas_error_host = norm_check_general('F', 1, N, incx, hx_cpu, hx_host, batch_count); hipblas_error_host = norm_check_general('F', 1, N, incx, hx_cpu, hx_device, batch_count); } } // end of if unit check if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasScalBatchedExFn(handle, N, d_alpha, alphaType, dx.ptr_on_device(), xType, incx, batch_count, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasScalBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, scal_gflop_count(N), scal_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_scal_batched_ex(const Arguments& arg) { hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_scal_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_scal_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_scal_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_scal_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_64F && xType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_scal_batched_ex_template(arg); } else if(alphaType == HIPBLAS_C_32F && xType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_scal_batched_ex_template(arg); } else if(alphaType == HIPBLAS_C_64F && xType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_scal_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_scal_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_64F && xType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_scal_batched_ex_template( arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_scal_ex.hpp000066400000000000000000000203441434647641600226740ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasScalExModel = ArgumentModel; inline void testname_scal_ex(const Arguments& arg, std::string& name) { hipblasScalExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_scal_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasScalExFn = FORTRAN ? hipblasScalExFortran : hipblasScalEx; int N = arg.N; int incx = arg.incx; int unit_check = arg.unit_check; int timing = arg.timing; int norm_check = arg.norm_check; size_t sizeX = size_t(N) * incx; Ta h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || incx <= 0) { CHECK_HIPBLAS_ERROR( hipblasScalExFn(handle, N, nullptr, alphaType, nullptr, xType, incx, executionType)); return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx_host(sizeX); host_vector hx_device(sizeX); host_vector hx_cpu(sizeX); device_vector dx(sizeX); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector(hx_host, arg, N, incx, 0, 1, hipblas_client_alpha_sets_nan, true); // copy vector is easy in STL; hz = hx: save a copy in hz which will be output of CPU BLAS hx_device = hx_cpu = hx_host; // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx_host, sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(Ta), hipMemcpyHostToDevice)); if(unit_check || norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasScalExFn(handle, N, &h_alpha, alphaType, dx, xType, incx, executionType)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx_host, dx, sizeof(Tx) * sizeX, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dx, hx_device, sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasScalExFn(handle, N, d_alpha, alphaType, dx, xType, incx, executionType)); CHECK_HIP_ERROR(hipMemcpy(hx_device, dx, sizeof(Tx) * sizeX, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_scal(N, h_alpha, hx_cpu, incx); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(unit_check) { unit_check_general(1, N, incx, hx_cpu, hx_host); unit_check_general(1, N, incx, hx_cpu, hx_device); } if(norm_check) { hipblas_error_host = norm_check_general('F', 1, N, incx, hx_cpu, hx_host); hipblas_error_host = norm_check_general('F', 1, N, incx, hx_cpu, hx_device); } } // end of if unit check if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasScalExFn(handle, N, d_alpha, alphaType, dx, xType, incx, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasScalExModel{}.log_args(std::cout, arg, gpu_time_used, scal_gflop_count(N), scal_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline hipblasStatus_t testing_scal_ex(const Arguments& arg) { hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_scal_ex_template(arg); } else if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_scal_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_scal_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_scal_ex_template(arg); } else if(alphaType == HIPBLAS_R_64F && xType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_scal_ex_template(arg); } else if(alphaType == HIPBLAS_C_32F && xType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_scal_ex_template(arg); } else if(alphaType == HIPBLAS_C_64F && xType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_scal_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_scal_ex_template(arg); } else if(alphaType == HIPBLAS_R_64F && xType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_scal_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_scal_strided_batched.hpp000066400000000000000000000127021434647641600253670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasScalStridedBatchedModel = ArgumentModel; inline void testname_scal_strided_batched(const Arguments& arg, std::string& name) { hipblasScalStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_scal_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasScalStridedBatchedFn = FORTRAN ? hipblasScalStridedBatched : hipblasScalStridedBatched; int N = arg.N; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int unit_check = arg.unit_check; int timing = arg.timing; hipblasStride stridex = size_t(N) * incx * stride_scale; size_t sizeX = stridex * batch_count; U alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || incx <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR( hipblasScalStridedBatchedFn(handle, N, nullptr, nullptr, incx, stridex, batch_count)); return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hz(sizeX); device_vector dx(sizeX); double gpu_time_used = 0.0, cpu_time_used = 0.0; double hipblas_error = 0.0; // Initial Data on CPU hipblas_init_vector( hx, arg, N, incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); // copy vector is easy in STL; hz = hx: save a copy in hz which will be output of CPU BLAS hz = hx; // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); if(arg.unit_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR( hipblasScalStridedBatchedFn(handle, N, &alpha, dx, incx, stridex, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx.data(), dx, sizeof(T) * sizeX, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_scal(N, alpha, hz.data() + b * stridex, incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, N, batch_count, incx, stridex, hz, hx); } } // end of if unit check if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasScalStridedBatchedFn(handle, N, &alpha, dx, incx, stridex, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasScalStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, scal_gflop_count(N), scal_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_scal_strided_batched_ex.hpp000066400000000000000000000247751434647641600261000ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasScalStridedBatchedExModel = ArgumentModel; inline void testname_scal_strided_batched_ex_template(const Arguments& arg, std::string& name) { hipblasScalStridedBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_scal_strided_batched_ex_template(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasScalStridedBatchedExFn = FORTRAN ? hipblasScalStridedBatchedExFortran : hipblasScalStridedBatchedEx; int N = arg.N; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int unit_check = arg.unit_check; int timing = arg.timing; int norm_check = arg.norm_check; hipblasStride stridex = size_t(N) * incx * stride_scale; size_t sizeX = stridex * batch_count; Ta h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || incx <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasScalStridedBatchedExFn(handle, N, nullptr, alphaType, nullptr, xType, incx, stride_scale, batch_count, executionType)); return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx_host(sizeX); host_vector hx_device(sizeX); host_vector hx_cpu(sizeX); device_vector dx(sizeX); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_vector( hx_host, arg, N, incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); // copy vector is easy in STL; hz = hx: save a copy in hz which will be output of CPU BLAS hx_device = hx_cpu = hx_host; // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx_host, sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(Ta), hipMemcpyHostToDevice)); if(unit_check || norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasScalStridedBatchedExFn( handle, N, &h_alpha, alphaType, dx, xType, incx, stridex, batch_count, executionType)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx_host, dx, sizeof(Tx) * sizeX, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dx, hx_device, sizeof(Tx) * sizeX, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasScalStridedBatchedExFn( handle, N, d_alpha, alphaType, dx, xType, incx, stridex, batch_count, executionType)); CHECK_HIP_ERROR(hipMemcpy(hx_device, dx, sizeof(Tx) * sizeX, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_scal(N, h_alpha, hx_cpu + b * stridex, incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(unit_check) { unit_check_general(1, N, batch_count, incx, stridex, hx_cpu, hx_host); unit_check_general(1, N, batch_count, incx, stridex, hx_cpu, hx_device); } if(norm_check) { hipblas_error_host = norm_check_general('F', 1, N, incx, stridex, hx_cpu, hx_host, batch_count); hipblas_error_device = norm_check_general('F', 1, N, incx, stridex, hx_cpu, hx_device, batch_count); } } // end of if unit check if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasScalStridedBatchedExFn(handle, N, d_alpha, alphaType, dx, xType, incx, stridex, batch_count, executionType)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasScalStridedBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, scal_gflop_count(N), scal_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } inline void testname_scal_strided_batched_ex(const Arguments& arg, std::string& name) { ArgumentModel{}.test_name(arg, name); } inline hipblasStatus_t testing_scal_strided_batched_ex(const Arguments& arg) { hipblasDatatype_t alphaType = arg.a_type; hipblasDatatype_t xType = arg.b_type; hipblasDatatype_t executionType = arg.compute_type; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && executionType == HIPBLAS_R_16F) { status = testing_scal_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_16F && xType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_scal_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_16F && executionType == HIPBLAS_R_32F) { status = testing_scal_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_R_32F && executionType == HIPBLAS_R_32F) { status = testing_scal_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_64F && xType == HIPBLAS_R_64F && executionType == HIPBLAS_R_64F) { status = testing_scal_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_C_32F && xType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_scal_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_C_64F && xType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_scal_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_32F && xType == HIPBLAS_C_32F && executionType == HIPBLAS_C_32F) { status = testing_scal_strided_batched_ex_template(arg); } else if(alphaType == HIPBLAS_R_64F && xType == HIPBLAS_C_64F && executionType == HIPBLAS_C_64F) { status = testing_scal_strided_batched_ex_template(arg); } else { status = HIPBLAS_STATUS_NOT_SUPPORTED; } return status; } hipBLAS-rocm-5.5.1/clients/include/testing_set_get_atomics_mode.hpp000066400000000000000000000052731434647641600254370ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ inline void testname_set_get_atomics_mode(const Arguments& arg, std::string& name) { ArgumentModel<>{}.test_name(arg, name); } inline hipblasStatus_t testing_set_get_atomics_mode(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSetAtomicsModeFn = FORTRAN ? hipblasSetAtomicsModeFortran : hipblasSetAtomicsMode; auto hipblasGetAtomicsModeFn = FORTRAN ? hipblasGetAtomicsModeFortran : hipblasGetAtomicsMode; hipblasAtomicsMode_t mode; hipblasLocalHandle handle(arg); // Not checking default as rocBLAS defaults to allowed // and cuBLAS defaults to not allowed. // CHECK_HIPBLAS_ERROR(hipblasGetAtomicsModeFn(handle, &mode)); // EXPECT_EQ(HIPBLAS_ATOMICS_ALLOWED, mode); // Make sure set()/get() functions work CHECK_HIPBLAS_ERROR(hipblasSetAtomicsModeFn(handle, HIPBLAS_ATOMICS_NOT_ALLOWED)); CHECK_HIPBLAS_ERROR(hipblasGetAtomicsModeFn(handle, &mode)); EXPECT_EQ(HIPBLAS_ATOMICS_NOT_ALLOWED, mode); CHECK_HIPBLAS_ERROR(hipblasSetAtomicsModeFn(handle, HIPBLAS_ATOMICS_ALLOWED)); CHECK_HIPBLAS_ERROR(hipblasGetAtomicsModeFn(handle, &mode)); EXPECT_EQ(HIPBLAS_ATOMICS_ALLOWED, mode); return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_set_get_matrix.hpp000066400000000000000000000125371434647641600243010ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSetGetMatrixModel = ArgumentModel; inline void testname_set_get_matrix(const Arguments& arg, std::string& name) { hipblasSetGetMatrixModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_set_get_matrix(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSetMatrixFn = FORTRAN ? hipblasSetMatrixFortran : hipblasSetMatrix; auto hipblasGetMatrixFn = FORTRAN ? hipblasGetMatrixFortran : hipblasGetMatrix; int rows = arg.rows; int cols = arg.cols; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(rows < 0 || cols < 0 || lda <= 0 || ldb <= 0 || ldc <= 0) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector ha(cols * lda); host_vector hb(cols * ldb); host_vector hb_ref(cols * ldb); host_vector hc(cols * ldc); device_vector dc(cols * ldc); double hipblas_error = 0.0, gpu_time_used = 0.0; hipblasLocalHandle handle(arg); // Initial Data on CPU srand(1); hipblas_init(ha, rows, cols, lda); hipblas_init(hb, rows, cols, ldb); hb_ref = hb; for(int i = 0; i < cols * ldc; i++) { hc[i] = 100 + i; }; CHECK_HIP_ERROR(hipMemcpy(dc, hc.data(), sizeof(T) * ldc * cols, hipMemcpyHostToDevice)); for(int i = 0; i < cols * ldc; i++) { hc[i] = 99.0; }; /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetMatrixFn(rows, cols, sizeof(T), (void*)ha, lda, (void*)dc, ldc)); CHECK_HIPBLAS_ERROR(hipblasGetMatrixFn(rows, cols, sizeof(T), (void*)dc, ldc, (void*)hb, ldb)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU BLAS =================================================================== */ // reference calculation for(int i1 = 0; i1 < rows; i1++) { for(int i2 = 0; i2 < cols; i2++) { hb_ref[i1 + i2 * ldb] = ha[i1 + i2 * lda]; } } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(rows, cols, ldb, hb, hb_ref); } if(arg.norm_check) { hipblas_error = norm_check_general('F', rows, cols, ldb, hb, hb_ref); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasSetMatrixFn(rows, cols, sizeof(T), (void*)ha, lda, (void*)dc, ldc)); CHECK_HIPBLAS_ERROR( hipblasGetMatrixFn(rows, cols, sizeof(T), (void*)dc, ldc, (void*)hb, ldb)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSetGetMatrixModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, set_get_matrix_gbyte_count(rows, cols), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_set_get_matrix_async.hpp000066400000000000000000000131601434647641600254670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSetGetMatrixAsyncModel = ArgumentModel; inline void testname_set_get_matrix_async(const Arguments& arg, std::string& name) { hipblasSetGetMatrixAsyncModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_set_get_matrix_async(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSetMatrixAsyncFn = FORTRAN ? hipblasSetMatrixAsyncFortran : hipblasSetMatrixAsync; auto hipblasGetMatrixAsyncFn = FORTRAN ? hipblasGetMatrixAsyncFortran : hipblasGetMatrixAsync; int rows = arg.rows; int cols = arg.cols; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(rows < 0 || cols < 0 || lda <= 0 || ldb <= 0 || ldc <= 0) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector ha(cols * lda); host_vector hb(cols * ldb); host_vector hb_ref(cols * ldb); host_vector hc(cols * ldc); device_vector dc(cols * ldc); double hipblas_error = 0.0, gpu_time_used = 0.0; hipblasLocalHandle handle(arg); hipStream_t stream; hipblasGetStream(handle, &stream); // Initial Data on CPU srand(1); hipblas_init(ha, rows, cols, lda); hipblas_init(hb, rows, cols, ldb); hb_ref = hb; for(int i = 0; i < cols * ldc; i++) { hc[i] = 100 + i; }; CHECK_HIP_ERROR(hipMemcpy(dc, hc.data(), sizeof(T) * ldc * cols, hipMemcpyHostToDevice)); for(int i = 0; i < cols * ldc; i++) { hc[i] = 99.0; }; /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR( hipblasSetMatrixAsyncFn(rows, cols, sizeof(T), (void*)ha, lda, (void*)dc, ldc, stream)); CHECK_HIPBLAS_ERROR( hipblasGetMatrixAsyncFn(rows, cols, sizeof(T), (void*)dc, ldc, (void*)hb, ldb, stream)); CHECK_HIP_ERROR(hipStreamSynchronize(stream)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU BLAS =================================================================== */ // reference calculation for(int i1 = 0; i1 < rows; i1++) { for(int i2 = 0; i2 < cols; i2++) { hb_ref[i1 + i2 * ldb] = ha[i1 + i2 * lda]; } } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(rows, cols, ldb, hb, hb_ref); } if(arg.norm_check) { hipblas_error = norm_check_general('F', rows, cols, ldb, hb, hb_ref); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSetMatrixAsyncFn( rows, cols, sizeof(T), (void*)ha, lda, (void*)dc, ldc, stream)); CHECK_HIPBLAS_ERROR(hipblasGetMatrixAsyncFn( rows, cols, sizeof(T), (void*)dc, ldc, (void*)hb, ldb, stream)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSetGetMatrixAsyncModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, set_get_matrix_gbyte_count(rows, cols), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_set_get_vector.hpp000066400000000000000000000120131434647641600242640ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSetGetVectorModel = ArgumentModel; inline void testname_set_get_vector(const Arguments& arg, std::string& name) { hipblasSetGetVectorModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_set_get_vector(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSetVectorFn = FORTRAN ? hipblasSetVectorFortran : hipblasSetVector; auto hipblasGetVectorFn = FORTRAN ? hipblasGetVectorFortran : hipblasGetVector; int M = arg.M; int incx = arg.incx; int incy = arg.incy; int incd = arg.incd; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; hipblasStatus_t status_set = HIPBLAS_STATUS_SUCCESS; hipblasStatus_t status_get = HIPBLAS_STATUS_SUCCESS; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(M < 0 || incx <= 0 || incy <= 0 || incd <= 0) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hx(M * incx); host_vector hy(M * incy); host_vector hy_ref(M * incy); device_vector db(M * incd); double hipblas_error = 0.0, gpu_time_used = 0.0; hipblasLocalHandle handle(arg); // Initial Data on CPU srand(1); hipblas_init(hx, 1, M, incx); hipblas_init(hy, 1, M, incy); hy_ref = hy; /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetVectorFn(M, sizeof(T), (void*)hx, incx, (void*)db, incd)); CHECK_HIPBLAS_ERROR(hipblasGetVectorFn(M, sizeof(T), (void*)db, incd, (void*)hy, incy)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU BLAS =================================================================== */ // reference calculation for(int i = 0; i < M; i++) { hy_ref[i * incy] = hx[i * incx]; } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, incy, hy, hy_ref); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, M, incy, hy, hy_ref); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSetVectorFn(M, sizeof(T), (void*)hx, incx, (void*)db, incd)); CHECK_HIPBLAS_ERROR(hipblasGetVectorFn(M, sizeof(T), (void*)db, incd, (void*)hy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSetGetVectorModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, set_get_vector_gbyte_count(M), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_set_get_vector_async.hpp000066400000000000000000000122371434647641600254710ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSetGetVectorAsyncModel = ArgumentModel; inline void testname_set_get_vector_async(const Arguments& arg, std::string& name) { hipblasSetGetVectorAsyncModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_set_get_vector_async(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSetVectorAsyncFn = FORTRAN ? hipblasSetVectorAsyncFortran : hipblasSetVectorAsync; auto hipblasGetVectorAsyncFn = FORTRAN ? hipblasGetVectorAsyncFortran : hipblasGetVectorAsync; int M = arg.M; int incx = arg.incx; int incy = arg.incy; int incd = arg.incd; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(M < 0 || incx <= 0 || incy <= 0 || incd <= 0) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hx(M * incx); host_vector hy(M * incy); host_vector hy_ref(M * incy); device_vector db(M * incd); double hipblas_error = 0.0, gpu_time_used = 0.0; hipblasLocalHandle handle(arg); hipStream_t stream; hipblasGetStream(handle, &stream); // Initial Data on CPU srand(1); hipblas_init(hx, 1, M, incx); hipblas_init(hy, 1, M, incy); hy_ref = hy; /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR( hipblasSetVectorAsyncFn(M, sizeof(T), (void*)hx, incx, (void*)db, incd, stream)); CHECK_HIPBLAS_ERROR( hipblasGetVectorAsyncFn(M, sizeof(T), (void*)db, incd, (void*)hy, incy, stream)); CHECK_HIP_ERROR(hipStreamSynchronize(stream)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== CPU BLAS =================================================================== */ // reference calculation for(int i = 0; i < M; i++) { hy_ref[i * incy] = hx[i * incx]; } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, incy, hy.data(), hy_ref.data()); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, M, incy, hy, hy_ref); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasSetVectorAsyncFn(M, sizeof(T), (void*)hx, incx, (void*)db, incd, stream)); CHECK_HIPBLAS_ERROR( hipblasGetVectorAsyncFn(M, sizeof(T), (void*)db, incd, (void*)hy, incy, stream)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSetGetVectorAsyncModel{}.log_args(std::cout, arg, gpu_time_used, ArgumentLogging::NA_value, set_get_vector_gbyte_count(M), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_spmv.hpp000066400000000000000000000162221434647641600222430ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSpmvModel = ArgumentModel; inline void testname_spmv(const Arguments& arg, std::string& name) { hipblasSpmvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_spmv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSpmvFn = FORTRAN ? hipblasSpmv : hipblasSpmv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int incx = arg.incx; int incy = arg.incy; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t x_size = size_t(M) * abs_incx; size_t y_size = size_t(M) * abs_incy; size_t A_size = size_t(M) * (M + 1) / 2; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || !incx || !incy; if(invalid_size || !M) { hipblasStatus_t actual = hipblasSpmvFn( handle, uplo, M, nullptr, nullptr, nullptr, incx, nullptr, nullptr, incy); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(x_size); host_vector hy(y_size); host_vector hy_cpu(y_size); host_vector hy_host(y_size); host_vector hy_device(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix(hA, arg, A_size, 1, 1, 0, 1, hipblas_client_alpha_sets_nan, true, false); hipblas_init_vector(hx, arg, M, abs_incx, 0, 1, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, M, abs_incy, 0, 1, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hz = hy: save a copy in hz which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasSpmvFn(handle, uplo, M, &h_alpha, dA, dx, incx, &h_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasSpmvFn(handle, uplo, M, d_alpha, dA, dx, incx, d_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_spmv(uplo, M, h_alpha, hA.data(), hx.data(), incx, h_beta, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, abs_incy, hy_cpu, hy_host); unit_check_general(1, M, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_host); hipblas_error_device = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_device); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasSpmvFn(handle, uplo, M, d_alpha, dA, dx, incx, d_beta, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSpmvModel{}.log_args(std::cout, arg, gpu_time_used, spmv_gflop_count(M), spmv_gbyte_count(M), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_spmv_batched.hpp000066400000000000000000000214131434647641600237130ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSpmvBatchedModel = ArgumentModel; inline void testname_spmv_batched(const Arguments& arg, std::string& name) { hipblasSpmvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_spmv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSpmvBatchedFn = FORTRAN ? hipblasSpmvBatched : hipblasSpmvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; int abs_incy = incy >= 0 ? incy : -incy; size_t A_size = size_t(M) * (M + 1) / 2; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || !incx || !incy || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasSpmvBatchedFn( handle, uplo, M, nullptr, nullptr, nullptr, incx, nullptr, nullptr, incy, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(M, incx, batch_count); host_batch_vector hy(M, incy, batch_count); host_batch_vector hy_cpu(M, incy, batch_count); host_batch_vector hy_host(M, incy, batch_count); host_batch_vector hy_device(M, incy, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(M, incx, batch_count); device_batch_vector dy(M, incy, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, hipblas_client_beta_sets_nan); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSpmvBatchedFn(handle, uplo, M, &h_alpha, dA.ptr_on_device(), dx.ptr_on_device(), incx, &h_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSpmvBatchedFn(handle, uplo, M, d_alpha, dA.ptr_on_device(), dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_spmv(uplo, M, h_alpha, hA[b], hx[b], incx, h_beta, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, M, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(dy.transfer_from(hy)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasSpmvBatchedFn(handle, uplo, M, d_alpha, dA.ptr_on_device(), dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSpmvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, spmv_gflop_count(M), spmv_gbyte_count(M), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_spmv_strided_batched.hpp000066400000000000000000000264421434647641600254400ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSpmvStridedBatchedModel = ArgumentModel; inline void testname_spmv_strided_batched(const Arguments& arg, std::string& name) { hipblasSpmvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_spmv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSpmvStridedBatchedFn = FORTRAN ? hipblasSpmvStridedBatched : hipblasSpmvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t dim_A = size_t(M) * (M + 1) / 2; hipblasStride stride_A = dim_A * stride_scale; hipblasStride stride_x = size_t(M) * abs_incx * stride_scale; hipblasStride stride_y = size_t(M) * abs_incy * stride_scale; size_t A_size = stride_A * batch_count; size_t X_size = stride_x * batch_count; size_t Y_size = stride_y * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || !incx || !incy || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasSpmvStridedBatchedFn(handle, uplo, M, nullptr, nullptr, stride_A, nullptr, incx, stride_x, nullptr, nullptr, incy, stride_y, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_cpu(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix( hA, arg, dim_A, 1, 1, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, M, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, M, abs_incy, stride_y, batch_count, hipblas_client_beta_sets_nan); hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSpmvStridedBatchedFn(handle, uplo, M, &h_alpha, dA, stride_A, dx, incx, stride_x, &h_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSpmvStridedBatchedFn(handle, uplo, M, d_alpha, dA, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_spmv(uplo, M, h_alpha, hA.data() + b * stride_A, hx.data() + b * stride_x, incx, h_beta, hy_cpu.data() + b * stride_y, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incy, stride_y, hy_cpu, hy_host); unit_check_general(1, M, batch_count, abs_incy, stride_y, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, M, abs_incy, stride_y, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, M, abs_incy, stride_y, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasSpmvStridedBatchedFn(handle, uplo, M, d_alpha, dA, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSpmvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, spmv_gflop_count(M), spmv_gbyte_count(M), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_spr.hpp000066400000000000000000000145571434647641600220730ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSprModel = ArgumentModel; inline void testname_spr(const Arguments& arg, std::string& name) { hipblasSprModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_spr(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSprFn = FORTRAN ? hipblasSpr : hipblasSpr; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int abs_incx = incx < 0 ? -incx : incx; size_t A_size = size_t(N) * (N + 1) / 2; size_t x_size = abs_incx * size_t(N); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx; if(invalid_size || !N) { hipblasStatus_t actual = hipblasSprFn(handle, uplo, N, nullptr, nullptr, incx, nullptr); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); device_vector dA(A_size); device_vector dx(x_size); device_vector d_alpha(1); T h_alpha = arg.get_alpha(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix(hA, arg, A_size, 1, 1, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSprFn(handle, uplo, N, &h_alpha, dx, incx, dA)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSprFn(handle, uplo, N, d_alpha, dx, incx, dA)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_spr(uplo, N, h_alpha, hx.data(), incx, hA_cpu.data()); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_size, 1, hA_cpu.data(), hA_host.data()); unit_check_general(1, A_size, 1, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, A_size, 1, hA_cpu.data(), hA_host.data()); hipblas_error_device = norm_check_general('F', 1, A_size, 1, hA_cpu.data(), hA_device.data()); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSprFn(handle, uplo, N, d_alpha, dx, incx, dA)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSprModel{}.log_args(std::cout, arg, gpu_time_used, spr_gflop_count(N), spr_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_spr2.hpp000066400000000000000000000155241434647641600221500ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSpr2Model = ArgumentModel; inline void testname_spr2(const Arguments& arg, std::string& name) { hipblasSpr2Model{}.test_name(arg, name); } template inline hipblasStatus_t testing_spr2(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSpr2Fn = FORTRAN ? hipblasSpr2 : hipblasSpr2; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int incy = arg.incy; int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; size_t A_size = size_t(N) * (N + 1) / 2; size_t x_size = abs_incx * size_t(N); size_t y_size = abs_incy * size_t(N); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy; if(invalid_size || !N) { hipblasStatus_t actual = hipblasSpr2Fn(handle, uplo, N, nullptr, nullptr, incx, nullptr, incy, nullptr); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); T h_alpha = arg.get_alpha(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix(hA, arg, A_size, 1, 1, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan); hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSpr2Fn(handle, uplo, N, &h_alpha, dx, incx, dy, incy, dA)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSpr2Fn(handle, uplo, N, d_alpha, dx, incx, dy, incy, dA)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_spr2(uplo, N, h_alpha, hx.data(), incx, hy.data(), incy, hA_cpu.data()); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_size, 1, hA_cpu.data(), hA_host.data()); unit_check_general(1, A_size, 1, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, A_size, 1, hA_cpu.data(), hA_host.data()); hipblas_error_device = norm_check_general('F', 1, A_size, 1, hA_cpu.data(), hA_device.data()); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSpr2Fn(handle, uplo, N, d_alpha, dx, incx, dy, incy, dA)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSpr2Model{}.log_args(std::cout, arg, gpu_time_used, spr2_gflop_count(N), spr2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_spr2_batched.hpp000066400000000000000000000206741434647641600236240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSpr2BatchedModel = ArgumentModel; inline void testname_spr2_batched(const Arguments& arg, std::string& name) { hipblasSpr2BatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_spr2_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSpr2BatchedFn = FORTRAN ? hipblasSpr2Batched : hipblasSpr2Batched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; size_t A_size = size_t(N) * (N + 1) / 2; T h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasSpr2BatchedFn( handle, uplo, N, nullptr, nullptr, incx, nullptr, incy, nullptr, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_cpu(A_size, 1, batch_count); host_batch_vector hA_host(A_size, 1, batch_count); host_batch_vector hA_device(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, hipblas_client_alpha_sets_nan); hA_cpu.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSpr2BatchedFn(handle, uplo, N, &h_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), batch_count)); CHECK_HIP_ERROR(hA_host.transfer_from(dA)); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSpr2BatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), batch_count)); CHECK_HIP_ERROR(hA_device.transfer_from(dA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_spr2(uplo, N, h_alpha, hx[b], incx, hy[b], incy, hA_cpu[b]); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_size, batch_count, 1, hA_cpu, hA_host); unit_check_general(1, A_size, batch_count, 1, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_host, batch_count); hipblas_error_device = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dA.transfer_from(hA)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSpr2BatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSpr2BatchedModel{}.log_args(std::cout, arg, gpu_time_used, spr2_gflop_count(N), spr2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_spr2_strided_batched.hpp000066400000000000000000000254141434647641600253370ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSpr2StridedBatchedModel = ArgumentModel; inline void testname_spr2_strided_batched(const Arguments& arg, std::string& name) { hipblasSpr2StridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_spr2_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSpr2StridedBatchedFn = FORTRAN ? hipblasSpr2StridedBatched : hipblasSpr2StridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; int A_dim = N * (N + 1) / 2; hipblasStride strideA = A_dim * stride_scale; hipblasStride stridex = abs_incx * N * stride_scale; hipblasStride stridey = abs_incy * N * stride_scale; size_t A_size = strideA * batch_count; size_t x_size = stridex * batch_count; size_t y_size = stridey * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasSpr2StridedBatchedFn(handle, uplo, N, nullptr, nullptr, incx, stridex, nullptr, incy, stridey, nullptr, strideA, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); T h_alpha = arg.get_alpha(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix( hA, arg, A_dim, 1, 1, strideA, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, N, abs_incy, stridey, batch_count, hipblas_client_alpha_sets_nan); hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSpr2StridedBatchedFn(handle, uplo, N, &h_alpha, dx, incx, stridex, dy, incy, stridey, dA, strideA, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSpr2StridedBatchedFn(handle, uplo, N, d_alpha, dx, incx, stridex, dy, incy, stridey, dA, strideA, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_spr2(uplo, N, h_alpha, hx.data() + b * stridex, incx, hy.data() + b * stridey, incy, hA_cpu.data() + b * strideA); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_dim, batch_count, 1, strideA, hA_cpu, hA_host); unit_check_general(1, A_dim, batch_count, 1, strideA, hA_cpu, hA_host); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, A_dim, 1, strideA, hA_cpu.data(), hA_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', 1, A_dim, 1, strideA, hA_cpu.data(), hA_device.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSpr2StridedBatchedFn(handle, uplo, N, d_alpha, dx, incx, stridex, dy, incy, stridey, dA, strideA, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSpr2StridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, spr2_gflop_count(N), spr2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_spr_batched.hpp000066400000000000000000000160361434647641600235370ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSprBatchedModel = ArgumentModel; inline void testname_spr_batched(const Arguments& arg, std::string& name) { hipblasSprBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_spr_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSprBatchedFn = FORTRAN ? hipblasSprBatched : hipblasSprBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; size_t A_size = size_t(N) * (N + 1) / 2; T h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasSprBatchedFn(handle, uplo, N, nullptr, nullptr, incx, nullptr, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_cpu(A_size, 1, batch_count); host_batch_vector hA_host(A_size, 1, batch_count); host_batch_vector hA_device(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hA_cpu.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSprBatchedFn( handle, uplo, N, &h_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), batch_count)); CHECK_HIP_ERROR(hA_host.transfer_from(dA)); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSprBatchedFn( handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), batch_count)); CHECK_HIP_ERROR(hA_device.transfer_from(dA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_spr(uplo, N, h_alpha, hx[b], incx, hA_cpu[b]); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_size, batch_count, 1, hA_cpu, hA_host); unit_check_general(1, A_size, batch_count, 1, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_host, batch_count); hipblas_error_device = norm_check_general('F', 1, A_size, 1, hA_cpu, hA_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dA.transfer_from(hA)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSprBatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSprBatchedModel{}.log_args(std::cout, arg, gpu_time_used, spr_gflop_count(N), spr_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_spr_strided_batched.hpp000066400000000000000000000165611434647641600252600ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSprStridedBatchedModel = ArgumentModel; inline void testname_spr_strided_batched(const Arguments& arg, std::string& name) { hipblasSprStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_spr_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSprStridedBatchedFn = FORTRAN ? hipblasSprStridedBatched : hipblasSprStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; int A_dim = N * (N + 1) / 2; hipblasStride strideA = A_dim * stride_scale; hipblasStride stridex = abs_incx * N * stride_scale; size_t A_size = strideA * batch_count; size_t x_size = stridex * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasSprStridedBatchedFn( handle, uplo, N, nullptr, nullptr, incx, stridex, nullptr, strideA, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); device_vector dA(A_size); device_vector dx(x_size); device_vector d_alpha(1); T h_alpha = arg.get_alpha(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix( hA, arg, A_dim, 1, 1, strideA, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan, false, true); hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSprStridedBatchedFn( handle, uplo, N, &h_alpha, dx, incx, stridex, dA, strideA, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSprStridedBatchedFn( handle, uplo, N, d_alpha, dx, incx, stridex, dA, strideA, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_spr( uplo, N, h_alpha, hx.data() + b * stridex, incx, hA_cpu.data() + b * strideA); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, A_dim, batch_count, 1, strideA, hA_cpu, hA_host); unit_check_general(1, A_dim, batch_count, 1, strideA, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, A_dim, 1, strideA, hA_cpu.data(), hA_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', 1, A_dim, 1, strideA, hA_cpu.data(), hA_device.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSprStridedBatchedFn( handle, uplo, N, d_alpha, dx, incx, stridex, dA, strideA, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSprStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, spr_gflop_count(N), spr_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_swap.hpp000066400000000000000000000130741434647641600222320ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSwapModel = ArgumentModel; inline void testname_swap(const Arguments& arg, std::string& name) { hipblasSwapModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_swap(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSwapFn = FORTRAN ? hipblasSwap : hipblasSwap; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int unit_check = arg.unit_check; int norm_check = arg.norm_check; int timing = arg.timing; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0) { CHECK_HIPBLAS_ERROR(hipblasSwapFn(handle, N, nullptr, incx, nullptr, incy)); return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t sizeX = size_t(N) * abs_incx; size_t sizeY = size_t(N) * abs_incy; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy(sizeY); host_vector hx_cpu(sizeX); host_vector hy_cpu(sizeY); // allocate memory on device device_vector dx(sizeX); device_vector dy(sizeY); int device_pointer = 1; double gpu_time_used = 0.0, cpu_time_used = 0.0; double hipblas_error = 0.0; // Initial Data on CPU hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan, false); hx_cpu = hx; hy_cpu = hy; // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); if(unit_check || norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSwapFn(handle, N, dx, incx, dy, incy)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx.data(), dx, sizeof(T) * sizeX, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hy.data(), dy, sizeof(T) * sizeY, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_swap(N, hx.data(), incx, hy.data(), incy); if(unit_check) { unit_check_general(1, N, abs_incx, hx_cpu.data(), hx.data()); unit_check_general(1, N, abs_incy, hy_cpu.data(), hy.data()); } if(norm_check) { hipblas_error = std::max(norm_check_general('F', 1, N, abs_incx, hx_cpu.data(), hx.data()), norm_check_general('F', 1, N, abs_incy, hy_cpu.data(), hy.data())); } } // end of if unit/norm check if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSwapFn(handle, N, dx, incx, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSwapModel{}.log_args(std::cout, arg, gpu_time_used, swap_gflop_count(N), swap_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_swap_batched.hpp000066400000000000000000000131411434647641600236770ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSwapBatchedModel = ArgumentModel; inline void testname_swap_batched(const Arguments& arg, std::string& name) { hipblasSwapBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_swap_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSwapBatchedFn = FORTRAN ? hipblasSwapBatched : hipblasSwapBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; int unit_check = arg.unit_check; int norm_check = arg.norm_check; int timing = arg.timing; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR( hipblasSwapBatchedFn(handle, N, nullptr, incx, nullptr, incy, batch_count)); return HIPBLAS_STATUS_SUCCESS; } int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; double hipblas_error = 0.0; double gpu_time_used = 0.0; // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); host_batch_vector hx_cpu(N, incx, batch_count); host_batch_vector hy_cpu(N, incy, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hy, arg, hipblas_client_alpha_sets_nan, false); hx_cpu.copy_from(hx); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); if(unit_check || norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSwapBatchedFn( handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hx.transfer_from(dx)); CHECK_HIP_ERROR(hy.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_swap(N, hx_cpu[b], incx, hy_cpu[b], incy); } if(unit_check) { unit_check_general(1, N, batch_count, abs_incx, hx_cpu, hx); unit_check_general(1, N, batch_count, abs_incy, hy_cpu, hy); } if(norm_check) { hipblas_error = std::max(norm_check_general('F', 1, N, abs_incx, hx_cpu, hx, batch_count), norm_check_general('F', 1, N, abs_incy, hy_cpu, hy, batch_count)); } } // end of if unit/norm check if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSwapBatchedFn( handle, N, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSwapBatchedModel{}.log_args(std::cout, arg, gpu_time_used, swap_gflop_count(N), swap_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_swap_strided_batched.hpp000066400000000000000000000142211434647641600254150ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSwapStridedBatchedModel = ArgumentModel; inline void testname_swap_strided_batched(const Arguments& arg, std::string& name) { hipblasSwapStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_swap_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSwapStridedBatchedFn = FORTRAN ? hipblasSwapStridedBatched : hipblasSwapStridedBatched; int N = arg.N; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int unit_check = arg.unit_check; int norm_check = arg.norm_check; int timing = arg.timing; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stridex = size_t(N) * abs_incx * stride_scale; hipblasStride stridey = size_t(N) * abs_incy * stride_scale; size_t sizeX = stridex * batch_count; size_t sizeY = stridey * batch_count; if(!sizeX) sizeX = 1; if(!sizeY) sizeY = 1; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N <= 0 || batch_count <= 0) { CHECK_HIPBLAS_ERROR(hipblasSwapStridedBatchedFn( handle, N, nullptr, incx, stridex, nullptr, incy, stridey, batch_count)); return HIPBLAS_STATUS_SUCCESS; } // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory, plz follow this practice host_vector hx(sizeX); host_vector hy(sizeY); host_vector hx_cpu(sizeX); host_vector hy_cpu(sizeY); device_vector dx(sizeX); device_vector dy(sizeY); double hipblas_error = 0.0; double gpu_time_used = 0.0; // Initial Data on CPU hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector( hy, arg, N, abs_incy, stridey, batch_count, hipblas_client_alpha_sets_nan, true); hx_cpu = hx; hy_cpu = hy; // copy data from CPU to device, does not work for incx != 1 CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * sizeY, hipMemcpyHostToDevice)); if(unit_check || norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSwapStridedBatchedFn( handle, N, dx, incx, stridex, dy, incy, stridey, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx.data(), dx, sizeof(T) * sizeX, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(hy.data(), dy, sizeof(T) * sizeY, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_swap(N, hx.data() + b * stridex, incx, hy.data() + b * stridey, incy); } if(unit_check) { unit_check_general(1, N, batch_count, abs_incy, stridey, hy_cpu.data(), hy.data()); } if(norm_check) { hipblas_error = norm_check_general('F', 1, N, abs_incy, stridey, hy_cpu, hy, batch_count); } } // end of if unit/norm check if(timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSwapStridedBatchedFn( handle, N, dx, incx, stridex, dy, incy, stridey, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSwapStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, swap_gflop_count(N), swap_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_symm.hpp000066400000000000000000000155721434647641600222520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSymmModel = ArgumentModel; inline void testname_symm(const Arguments& arg, std::string& name) { hipblasSymmModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_symm(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSymmFn = FORTRAN ? hipblasSymm : hipblasSymm; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); size_t rows = (side == HIPBLAS_SIDE_LEFT ? N : M); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || ldc < M || ldb < M || lda < K) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; size_t C_size = size_t(ldc) * N; host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix(hA, arg, rows, K, lda, 0, 1, hipblas_client_never_set_nan, true); hipblas_init_matrix(hB, arg, M, N, ldb, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_matrix(hC_host, arg, M, N, ldc, 0, 1, hipblas_client_beta_sets_nan); hC_gold = hC_host; hC_device = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasSymmFn(handle, side, uplo, M, N, &h_alpha, dA, lda, dB, ldb, &h_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasSymmFn(handle, side, uplo, M, N, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_symm(side, uplo, M, N, h_alpha, hA, lda, hB, ldb, h_beta, hC_gold, ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, ldc, hC_gold, hC_host); unit_check_general(M, N, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, hC_gold, hC_host); hipblas_error_device = norm_check_general('F', M, N, ldc, hC_gold, hC_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSymmFn( handle, side, uplo, M, N, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasSymmModel{}.log_args(std::cout, arg, gpu_time_used, symm_gflop_count(M, N, K), symm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_symm_batched.hpp000066400000000000000000000224661434647641600237240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSymmBatchedModel = ArgumentModel; inline void testname_symm_batched(const Arguments& arg, std::string& name) { hipblasSymmBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_symm_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSymmBatchedFn = FORTRAN ? hipblasSymmBatched : hipblasSymmBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(M < 0 || N < 0 || lda < K || ldb < M || ldc < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; size_t C_size = size_t(ldc) * N; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // host arrays host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hC_host(C_size, 1, batch_count); host_batch_vector hC_device(C_size, 1, batch_count); host_batch_vector hC_gold(C_size, 1, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hB, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hC_host, arg, hipblas_client_beta_sets_nan); hC_device.copy_from(hC_host); hC_gold.copy_from(hC_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSymmBatchedFn(handle, side, uplo, M, N, &h_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, &h_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); CHECK_HIP_ERROR(dC.transfer_from(hC_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSymmBatchedFn(handle, side, uplo, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_symm( side, uplo, M, N, h_alpha, hA[b], lda, hB[b], ldb, h_beta, hC_gold[b], ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, hC_gold, hC_host); unit_check_general(M, N, batch_count, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSymmBatchedFn(handle, side, uplo, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasSymmBatchedModel{}.log_args(std::cout, arg, gpu_time_used, symm_gflop_count(M, N, K), symm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_symm_strided_batched.hpp000066400000000000000000000267021434647641600254370ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSymmStridedBatchedModel = ArgumentModel; inline void testname_symm_strided_batched(const Arguments& arg, std::string& name) { hipblasSymmStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_symm_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSymmStridedBatchedFn = FORTRAN ? hipblasSymmStridedBatched : hipblasSymmStridedBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); size_t rows = (side == HIPBLAS_SIDE_LEFT ? N : M); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(M < 0 || N < 0 || lda < K || ldb < M || ldc < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } hipblasStride stride_A = size_t(lda) * K * stride_scale; hipblasStride stride_B = size_t(ldb) * N * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t B_size = stride_B * batch_count; size_t C_size = stride_C * batch_count; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix( hA, arg, rows, K, lda, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_matrix( hB, arg, M, N, ldb, stride_B, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_matrix( hC_host, arg, M, N, ldc, stride_C, batch_count, hipblas_client_beta_sets_nan); hC_gold = hC_host; hC_device = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSymmStridedBatchedFn(handle, side, uplo, M, N, &h_alpha, dA, lda, stride_A, dB, ldb, stride_B, &h_beta, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSymmStridedBatchedFn(handle, side, uplo, M, N, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_symm(side, uplo, M, N, h_alpha, hA + b * stride_A, lda, hB + b * stride_B, ldb, h_beta, hC_gold + b * stride_C, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldc, stride_C, hC_gold, hC_host); unit_check_general(M, N, batch_count, ldc, stride_C, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldc, stride_C, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldc, stride_C, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSymmStridedBatchedFn(handle, side, uplo, M, N, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasSymmStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, symm_gflop_count(M, N, K), symm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_symv.hpp000066400000000000000000000165261434647641600222630ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSymvModel = ArgumentModel; inline void testname_symv(const Arguments& arg, std::string& name) { hipblasSymvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_symv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSymvFn = FORTRAN ? hipblasSymv : hipblasSymv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; size_t x_size = size_t(M) * abs_incx; size_t y_size = size_t(M) * abs_incy; size_t A_size = size_t(lda) * M; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || lda < M || lda < 1 || !incx || !incy; if(invalid_size || !M) { hipblasStatus_t actual = hipblasSymvFn( handle, uplo, M, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(x_size); host_vector hy(y_size); host_vector hy_cpu(y_size); host_vector hy_host(y_size); host_vector hy_device(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix(hA, arg, M, M, lda, 0, 1, hipblas_client_alpha_sets_nan, true, false); hipblas_init_vector(hx, arg, M, abs_incx, 0, 1, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, M, abs_incy, 0, 1, hipblas_client_beta_sets_nan); // copy vector is easy in STL; hz = hy: save a copy in hz which will be output of CPU BLAS hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasSymvFn(handle, uplo, M, &h_alpha, dA, lda, dx, incx, &h_beta, dy, incy)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasSymvFn(handle, uplo, M, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_symv( uplo, M, h_alpha, hA.data(), lda, hx.data(), incx, h_beta, hy_cpu.data(), incy); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, abs_incy, hy_cpu, hy_host); unit_check_general(1, M, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, M, abs_incy, hy_cpu.data(), hy_host.data()); hipblas_error_device = norm_check_general('F', 1, M, abs_incy, hy_cpu.data(), hy_device.data()); } } if(arg.timing) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * M * incy, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasSymvFn(handle, uplo, M, d_alpha, dA, lda, dx, incx, d_beta, dy, incy)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSymvModel{}.log_args(std::cout, arg, gpu_time_used, symv_gflop_count(M), symv_gbyte_count(M), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_symv_batched.hpp000066400000000000000000000231541434647641600237300ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSymvBatchedModel = ArgumentModel; inline void testname_symv_batched(const Arguments& arg, std::string& name) { hipblasSymvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_symv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSymvBatchedFn = FORTRAN ? hipblasSymvBatched : hipblasSymvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; int batch_count = arg.batch_count; int abs_incy = incy >= 0 ? incy : -incy; size_t A_size = size_t(lda) * M; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || lda < M || lda < 1 || !incx || !incy || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasSymvBatchedFn(handle, uplo, M, nullptr, nullptr, lda, nullptr, incx, nullptr, nullptr, incy, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(M, incx, batch_count); host_batch_vector hy(M, incy, batch_count); host_batch_vector hy_cpu(M, incy, batch_count); host_batch_vector hy_host(M, incy, batch_count); host_batch_vector hy_device(M, incy, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(M, incx, batch_count); device_batch_vector dy(M, incy, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, hipblas_client_beta_sets_nan); hy_cpu.copy_from(hy); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSymvBatchedFn(handle, uplo, M, &h_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, &h_beta, dy.ptr_on_device(), incy, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hy_host.transfer_from(dy)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSymvBatchedFn(handle, uplo, M, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); CHECK_HIP_ERROR(hy_device.transfer_from(dy)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_symv(uplo, M, h_alpha, hA[b], lda, hx[b], incx, h_beta, hy_cpu[b], incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incy, hy_cpu, hy_host); unit_check_general(1, M, batch_count, abs_incy, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general('F', 1, M, abs_incy, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(dy.transfer_from(hy)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSymvBatchedFn(handle, uplo, M, d_alpha, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, d_beta, dy.ptr_on_device(), incy, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSymvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, symv_gflop_count(M), symv_gbyte_count(M), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_symv_strided_batched.hpp000066400000000000000000000277431434647641600254560ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSymvStridedBatchedModel = ArgumentModel; inline void testname_symv_strided_batched(const Arguments& arg, std::string& name) { hipblasSymvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_symv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSymvStridedBatchedFn = FORTRAN ? hipblasSymvStridedBatched : hipblasSymvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int M = arg.M; int lda = arg.lda; int incx = arg.incx; int incy = arg.incy; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; int abs_incy = incy >= 0 ? incy : -incy; hipblasStride stride_A = size_t(lda) * M * stride_scale; hipblasStride stride_x = size_t(M) * abs_incx * stride_scale; hipblasStride stride_y = size_t(M) * abs_incy * stride_scale; size_t A_size = stride_A * batch_count; size_t X_size = stride_x * batch_count; size_t Y_size = stride_y * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || lda < M || lda < 1 || !incx || !incy || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasSymvStridedBatchedFn(handle, uplo, M, nullptr, nullptr, lda, stride_A, nullptr, incx, stride_x, nullptr, nullptr, incy, stride_y, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hy(Y_size); host_vector hy_cpu(Y_size); host_vector hy_host(Y_size); host_vector hy_device(Y_size); device_vector dA(A_size); device_vector dx(X_size); device_vector dy(Y_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix( hA, arg, M, M, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hx, arg, M, abs_incx, stride_x, batch_count, hipblas_client_alpha_sets_nan); hipblas_init_vector(hy, arg, M, abs_incy, stride_y, batch_count, hipblas_client_beta_sets_nan); hy_cpu = hy; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSymvStridedBatchedFn(handle, uplo, M, &h_alpha, dA, lda, stride_A, dx, incx, stride_x, &h_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_host.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSymvStridedBatchedFn(handle, uplo, M, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hy_device.data(), dy, sizeof(T) * Y_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_symv(uplo, M, h_alpha, hA.data() + b * stride_A, lda, hx.data() + b * stride_x, incx, h_beta, hy_cpu.data() + b * stride_y, incy); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incy, stride_y, hy_cpu, hy_host); unit_check_general(1, M, batch_count, abs_incy, stride_y, hy_cpu, hy_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', 1, M, abs_incy, stride_y, hy_cpu, hy_host, batch_count); hipblas_error_device = norm_check_general( 'F', 1, M, abs_incy, stride_y, hy_cpu, hy_device, batch_count); } } if(arg.timing) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * Y_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSymvStridedBatchedFn(handle, uplo, M, d_alpha, dA, lda, stride_A, dx, incx, stride_x, d_beta, dy, incy, stride_y, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSymvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, symv_gflop_count(M), symv_gbyte_count(M), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syr.hpp000066400000000000000000000147211434647641600220750ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyrModel = ArgumentModel; inline void testname_syr(const Arguments& arg, std::string& name) { hipblasSyrModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syr(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrFn = FORTRAN ? hipblasSyr : hipblasSyr; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int lda = arg.lda; int abs_incx = incx < 0 ? -incx : incx; size_t A_size = size_t(lda) * N; size_t x_size = abs_incx * size_t(N); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || lda < N || lda < 1; if(invalid_size || !N) { hipblasStatus_t actual = hipblasSyrFn(handle, uplo, N, nullptr, nullptr, incx, nullptr, lda); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); device_vector dA(A_size); device_vector dx(x_size); device_vector d_alpha(1); T h_alpha = arg.get_alpha(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix(hA, arg, N, N, lda, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyrFn(handle, uplo, N, &h_alpha, dx, incx, dA, lda)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyrFn(handle, uplo, N, d_alpha, dx, incx, dA, lda)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_syr(uplo, N, h_alpha, hx.data(), incx, hA_cpu.data(), lda); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, lda, hA_cpu.data(), hA_host.data()); unit_check_general(N, N, lda, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, lda, hA_cpu.data(), hA_host.data()); hipblas_error_device = norm_check_general('F', N, N, lda, hA_cpu.data(), hA_device.data()); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyrFn(handle, uplo, N, d_alpha, dx, incx, dA, lda)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSyrModel{}.log_args(std::cout, arg, gpu_time_used, syr_gflop_count(N), syr_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syr2.hpp000066400000000000000000000156671434647641600221710ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyr2Model = ArgumentModel; inline void testname_syr2(const Arguments& arg, std::string& name) { hipblasSyr2Model{}.test_name(arg, name); } template inline hipblasStatus_t testing_syr2(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyr2Fn = FORTRAN ? hipblasSyr2 : hipblasSyr2; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int incy = arg.incy; int lda = arg.lda; int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; size_t A_size = size_t(lda) * N; size_t x_size = abs_incx * size_t(N); size_t y_size = abs_incy * size_t(N); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || lda < N || lda < 1; if(invalid_size || !N) { hipblasStatus_t actual = hipblasSyr2Fn(handle, uplo, N, nullptr, nullptr, incx, nullptr, incy, nullptr, lda); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); T h_alpha = arg.get_alpha(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix(hA, arg, N, N, lda, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, N, abs_incx, 0, 1, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, 0, 1, hipblas_client_alpha_sets_nan); hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyr2Fn(handle, uplo, N, &h_alpha, dx, incx, dy, incy, dA, lda)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyr2Fn(handle, uplo, N, d_alpha, dx, incx, dy, incy, dA, lda)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_syr2(uplo, N, h_alpha, hx.data(), incx, hy.data(), incy, hA_cpu.data(), lda); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, lda, hA_cpu.data(), hA_host.data()); unit_check_general(N, N, lda, hA_cpu.data(), hA_device.data()); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, lda, hA_cpu.data(), hA_host.data()); hipblas_error_device = norm_check_general('F', N, N, lda, hA_cpu.data(), hA_device.data()); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasSyr2Fn(handle, uplo, N, d_alpha, dx, incx, dy, incy, dA, lda)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSyr2Model{}.log_args(std::cout, arg, gpu_time_used, syr2_gflop_count(N), syr2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syr2_batched.hpp000066400000000000000000000212631434647641600236300ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyr2BatchedModel = ArgumentModel; inline void testname_syr2_batched(const Arguments& arg, std::string& name) { hipblasSyr2BatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syr2_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyr2BatchedFn = FORTRAN ? hipblasSyr2Batched : hipblasSyr2Batched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int incy = arg.incy; int lda = arg.lda; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; size_t A_size = size_t(lda) * N; T h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || lda < N || lda < 1 || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasSyr2BatchedFn( handle, uplo, N, nullptr, nullptr, incx, nullptr, incy, nullptr, lda, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_cpu(A_size, 1, batch_count); host_batch_vector hA_host(A_size, 1, batch_count); host_batch_vector hA_device(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); host_batch_vector hy(N, incy, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_batch_vector dy(N, incy, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); CHECK_HIP_ERROR(dy.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, hipblas_client_alpha_sets_nan); hA_cpu.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dy.transfer_from(hy)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyr2BatchedFn(handle, uplo, N, &h_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_host.transfer_from(dA)); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyr2BatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_device.transfer_from(dA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_syr2(uplo, N, h_alpha, hx[b], incx, hy[b], incy, hA_cpu[b], lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, lda, hA_cpu, hA_host); unit_check_general(N, N, batch_count, lda, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, lda, hA_cpu, hA_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, lda, hA_cpu, hA_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dA.transfer_from(hA)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyr2BatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dy.ptr_on_device(), incy, dA.ptr_on_device(), lda, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSyr2BatchedModel{}.log_args(std::cout, arg, gpu_time_used, syr2_gflop_count(N), syr2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syr2_strided_batched.hpp000066400000000000000000000261371434647641600253530ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyr2StridedBatchedModel = ArgumentModel; inline void testname_syr2_strided_batched(const Arguments& arg, std::string& name) { hipblasSyr2StridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syr2_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyr2StridedBatchedFn = FORTRAN ? hipblasSyr2StridedBatched : hipblasSyr2StridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int incy = arg.incy; int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; int abs_incy = incy < 0 ? -incy : incy; hipblasStride strideA = lda * N * stride_scale; hipblasStride stridex = abs_incx * N * stride_scale; hipblasStride stridey = abs_incy * N * stride_scale; size_t A_size = strideA * batch_count; size_t x_size = stridex * batch_count; size_t y_size = stridey * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || !incy || lda < N || lda < 1 || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasSyr2StridedBatchedFn(handle, uplo, N, nullptr, nullptr, incx, stridex, nullptr, incy, stridey, nullptr, lda, strideA, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); host_vector hy(y_size); device_vector dA(A_size); device_vector dx(x_size); device_vector dy(y_size); device_vector d_alpha(1); T h_alpha = arg.get_alpha(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix( hA, arg, N, N, lda, strideA, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan, false, true); hipblas_init_vector(hy, arg, N, abs_incy, stridey, batch_count, hipblas_client_alpha_sets_nan); hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dy, hy.data(), sizeof(T) * y_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyr2StridedBatchedFn(handle, uplo, N, &h_alpha, dx, incx, stridex, dy, incy, stridey, dA, lda, strideA, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyr2StridedBatchedFn(handle, uplo, N, d_alpha, dx, incx, stridex, dy, incy, stridey, dA, lda, strideA, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_syr2(uplo, N, h_alpha, hx.data() + b * stridex, incx, hy.data() + b * stridey, incy, hA_cpu.data() + b * strideA, lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, lda, strideA, hA_cpu, hA_host); unit_check_general(N, N, batch_count, lda, strideA, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', N, N, lda, strideA, hA_cpu.data(), hA_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', N, N, lda, strideA, hA_cpu.data(), hA_device.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyr2StridedBatchedFn(handle, uplo, N, d_alpha, dx, incx, stridex, dy, incy, stridey, dA, lda, strideA, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSyr2StridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, syr2_gflop_count(N), syr2_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syr2k.hpp000066400000000000000000000160041434647641600223260ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyr2kModel = ArgumentModel; inline void testname_syr2k(const Arguments& arg, std::string& name) { hipblasSyr2kModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syr2k(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyr2kFn = FORTRAN ? hipblasSyr2k : hipblasSyr2k; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K))) { return HIPBLAS_STATUS_INVALID_VALUE; } int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t B_size = size_t(ldb) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix(hA, arg, N, K1, lda, 0, 1, hipblas_client_never_set_nan, true); hipblas_init_matrix(hB, arg, N, K1, ldb, 0, 1, hipblas_client_never_set_nan, false, true); hipblas_init_matrix(hC_host, arg, N, N, ldc, 0, 1, hipblas_client_never_set_nan); hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyr2kFn( handle, uplo, transA, N, K, &h_alpha, dA, lda, dB, ldb, &h_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasSyr2kFn(handle, uplo, transA, N, K, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_syr2k(uplo, transA, N, K, h_alpha, hA, lda, hB, ldb, h_beta, hC_gold, ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, ldc, hC_gold, hC_host); unit_check_general(N, N, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyr2kFn( handle, uplo, transA, N, K, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasSyr2kModel{}.log_args(std::cout, arg, gpu_time_used, syr2k_gflop_count(N, K), syr2k_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syr2k_batched.hpp000066400000000000000000000235611434647641600240060ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyr2kBatchedModel = ArgumentModel; inline void testname_syr2k_batched(const Arguments& arg, std::string& name) { hipblasSyr2kBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syr2k_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyr2kBatchedFn = FORTRAN ? hipblasSyr2kBatched : hipblasSyr2kBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K)) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t B_size = size_t(ldb) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hC_host(C_size, 1, batch_count); host_batch_vector hC_device(C_size, 1, batch_count); host_batch_vector hC_gold(C_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hB, arg, hipblas_client_never_set_nan, false, true); hipblas_init_vector(hC_host, arg, hipblas_client_never_set_nan); hC_device.copy_from(hC_host); hC_gold.copy_from(hC_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyr2kBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, &h_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); CHECK_HIP_ERROR(dC.transfer_from(hC_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyr2kBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_syr2k( uplo, transA, N, K, h_alpha, hA[b], lda, hB[b], ldb, h_beta, hC_gold[b], ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyr2kBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasSyr2kBatchedModel{}.log_args(std::cout, arg, gpu_time_used, syr2k_gflop_count(N, K), syr2k_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syr2k_strided_batched.hpp000066400000000000000000000272431434647641600255250ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyr2kStridedBatchedModel = ArgumentModel; inline void testname_syr2k_strided_batched(const Arguments& arg, std::string& name) { hipblasSyr2kStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syr2k_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrk2StridedBatchedFn = FORTRAN ? hipblasSyr2kStridedBatched : hipblasSyr2kStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int K1 = (transA == HIPBLAS_OP_N ? K : N); hipblasStride stride_A = size_t(lda) * K1 * stride_scale; hipblasStride stride_B = size_t(ldb) * K1 * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t B_size = stride_B * batch_count; size_t C_size = stride_C * batch_count; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K)) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix( hA, arg, N, K1, lda, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_matrix( hB, arg, N, K1, ldb, stride_B, batch_count, hipblas_client_never_set_nan, false, true); hipblas_init_matrix( hC_host, arg, N, N, ldc, stride_C, batch_count, hipblas_client_never_set_nan); hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyrk2StridedBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA, lda, stride_A, dB, ldb, stride_B, &h_beta, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyrk2StridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_syr2k(uplo, transA, N, K, h_alpha, hA.data() + b * stride_A, lda, hB.data() + b * stride_B, ldb, h_beta, hC_gold.data() + b * stride_C, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyrk2StridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasSyr2kStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, syr2k_gflop_count(N, K), syr2k_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syr_batched.hpp000066400000000000000000000176101434647641600235470ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyrBatchedModel = ArgumentModel; inline void testname_syr_batched(const Arguments& arg, std::string& name) { hipblasSyrBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syr_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrBatchedFn = FORTRAN ? hipblasSyrBatched : hipblasSyrBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int lda = arg.lda; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; size_t A_size = size_t(lda) * N; T h_alpha = arg.get_alpha(); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || lda < N || lda < 1 || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasSyrBatchedFn( handle, uplo, N, nullptr, nullptr, incx, nullptr, lda, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error_host, hipblas_error_device; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hA_cpu(A_size, 1, batch_count); host_batch_vector hA_host(A_size, 1, batch_count); host_batch_vector hA_device(A_size, 1, batch_count); host_batch_vector hx(N, incx, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(N, incx, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hA_cpu.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyrBatchedFn(handle, uplo, N, &h_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_host.transfer_from(dA)); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyrBatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), lda, batch_count)); CHECK_HIP_ERROR(hA_device.transfer_from(dA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_syr(uplo, N, h_alpha, hx[b], incx, hA_cpu[b], lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, lda, hA_cpu, hA_host); unit_check_general(N, N, batch_count, lda, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, lda, hA_cpu, hA_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, lda, hA_cpu, hA_device, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dA.transfer_from(hA)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyrBatchedFn(handle, uplo, N, d_alpha, dx.ptr_on_device(), incx, dA.ptr_on_device(), lda, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSyrBatchedModel{}.log_args(std::cout, arg, gpu_time_used, syr_gflop_count(N), syr_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syr_strided_batched.hpp000066400000000000000000000166701434647641600252720ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyrStridedBatchedModel = ArgumentModel; inline void testname_syr_strided_batched(const Arguments& arg, std::string& name) { hipblasSyrStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syr_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrStridedBatchedFn = FORTRAN ? hipblasSyrStridedBatched : hipblasSyrStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); int N = arg.N; int incx = arg.incx; int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; hipblasStride strideA = size_t(lda) * N * stride_scale; hipblasStride stridex = size_t(abs_incx) * N * stride_scale; size_t A_size = strideA * batch_count; size_t x_size = stridex * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || lda < N || lda < 1 || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasSyrStridedBatchedFn( handle, uplo, N, nullptr, nullptr, incx, stridex, nullptr, lda, strideA, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hA_cpu(A_size); host_vector hA_host(A_size); host_vector hA_device(A_size); host_vector hx(x_size); device_vector dA(A_size); device_vector dx(x_size); device_vector d_alpha(1); T h_alpha = arg.get_alpha(); double gpu_time_used, hipblas_error_host, hipblas_error_device; // Initial Data on CPU hipblas_init_matrix( hA, arg, N, N, lda, strideA, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_alpha_sets_nan, false, true); hA_cpu = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyrStridedBatchedFn( handle, uplo, N, &h_alpha, dx, incx, stridex, dA, lda, strideA, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_host.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyrStridedBatchedFn( handle, uplo, N, d_alpha, dx, incx, stridex, dA, lda, strideA, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hA_device.data(), dA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_syr( uplo, N, h_alpha, hx.data() + b * stridex, incx, hA_cpu.data() + b * strideA, lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, lda, strideA, hA_cpu, hA_host); unit_check_general(N, N, batch_count, lda, strideA, hA_cpu, hA_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general( 'F', N, N, lda, strideA, hA_cpu.data(), hA_host.data(), batch_count); hipblas_error_device = norm_check_general( 'F', N, N, lda, strideA, hA_cpu.data(), hA_device.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyrStridedBatchedFn( handle, uplo, N, d_alpha, dx, incx, stridex, dA, lda, strideA, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSyrStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, syr_gflop_count(N), syr_gbyte_count(N), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syrk.hpp000066400000000000000000000151471434647641600222530ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyrkModel = ArgumentModel; inline void testname_syrk(const Arguments& arg, std::string& name) { hipblasSyrkModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syrk(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrkFn = FORTRAN ? hipblasSyrk : hipblasSyrk; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int K = arg.K; int lda = arg.lda; int ldc = arg.ldc; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && lda < N) || (transA != HIPBLAS_OP_N && lda < K)) { return HIPBLAS_STATUS_INVALID_VALUE; } int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix(hA, arg, N, K1, lda, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix(hC_host, arg, N, N, ldc, 0, 1, hipblas_client_beta_sets_nan); hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasSyrkFn(handle, uplo, transA, N, K, &h_alpha, dA, lda, &h_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasSyrkFn(handle, uplo, transA, N, K, d_alpha, dA, lda, d_beta, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_syrk(uplo, transA, N, K, h_alpha, hA, lda, h_beta, hC_gold, ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, ldc, hC_gold, hC_host); unit_check_general(N, N, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasSyrkFn(handle, uplo, transA, N, K, d_alpha, dA, lda, d_beta, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasSyrkModel{}.log_args(std::cout, arg, gpu_time_used, syrk_gflop_count(N, K), syrk_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syrk_batched.hpp000066400000000000000000000212001434647641600237100ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyrkBatchedModel = ArgumentModel; inline void testname_syrk_batched(const Arguments& arg, std::string& name) { hipblasSyrkBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syrk_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrkBatchedFn = FORTRAN ? hipblasSyrkBatched : hipblasSyrkBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int K = arg.K; int lda = arg.lda; int ldc = arg.ldc; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && lda < N) || (transA != HIPBLAS_OP_N && lda < K) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hC_host(C_size, 1, batch_count); host_batch_vector hC_device(C_size, 1, batch_count); host_batch_vector hC_gold(C_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hC_host, arg, hipblas_client_beta_sets_nan); hC_device.copy_from(hC_host); hC_gold.copy_from(hC_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyrkBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA.ptr_on_device(), lda, &h_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); CHECK_HIP_ERROR(dC.transfer_from(hC_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyrkBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, d_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_syrk(uplo, transA, N, K, h_alpha, hA[b], lda, h_beta, hC_gold[b], ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyrkBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, d_beta, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasSyrkBatchedModel{}.log_args(std::cout, arg, gpu_time_used, syrk_gflop_count(N, K), syrk_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syrk_strided_batched.hpp000066400000000000000000000246671434647641600254520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyrkStridedBatchedModel = ArgumentModel; inline void testname_syrk_strided_batched(const Arguments& arg, std::string& name) { hipblasSyrkStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syrk_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrkStridedBatchedFn = FORTRAN ? hipblasSyrkStridedBatched : hipblasSyrkStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int K = arg.K; int lda = arg.lda; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int K1 = (transA == HIPBLAS_OP_N ? K : N); hipblasStride stride_A = size_t(lda) * K1 * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t C_size = stride_C * batch_count; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && lda < N) || (transA != HIPBLAS_OP_N && lda < K) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix( hA, arg, N, K1, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix( hC_host, arg, N, N, ldc, stride_C, batch_count, hipblas_client_beta_sets_nan); hC_device = hC_host; hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyrkStridedBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA, lda, stride_A, &h_beta, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyrkStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, d_beta, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_syrk(uplo, transA, N, K, h_alpha, hA.data() + b * stride_A, lda, h_beta, hC_gold.data() + b * stride_C, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyrkStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, d_beta, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasSyrkStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, syrk_gflop_count(N, K), syrk_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syrkx.hpp000066400000000000000000000154511434647641600224410ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyrkxModel = ArgumentModel; inline void testname_syrkx(const Arguments& arg, std::string& name) { hipblasSyrkxModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syrkx(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrkxFn = FORTRAN ? hipblasSyrkx : hipblasSyrkx; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t trans = char2hipblas_operation(arg.transA); int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (trans == HIPBLAS_OP_N && (lda < N || ldb < N)) || (trans != HIPBLAS_OP_N && (lda < K || ldb < K))) { return HIPBLAS_STATUS_INVALID_VALUE; } int K1 = (trans == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t B_size = size_t(lda) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU srand(1); hipblas_init(hA, N, K1, lda); hipblas_init(hB, N, K1, ldb); hipblas_init(hC_host, N, N, ldc); hC_device = hC_gold = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyrkxFn( handle, uplo, trans, N, K, (T*)&h_alpha, dA, lda, dB, ldb, (T*)&h_beta, dC, ldc)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasSyrkxFn(handle, uplo, trans, N, K, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ syrkx_reference(uplo, trans, N, K, h_alpha, hA, lda, hB, ldb, h_beta, hC_gold, ldc); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, ldc, hC_gold, hC_host); unit_check_general(N, N, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = std::abs(norm_check_general('F', N, N, ldc, hC_gold, hC_host)); hipblas_error_device = std::abs(norm_check_general('F', N, N, ldc, hC_gold, hC_device)); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyrkxFn( handle, uplo, trans, N, K, d_alpha, dA, lda, dB, ldb, d_beta, dC, ldc)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSyrkxModel{}.log_args(std::cout, arg, gpu_time_used, syrkx_gflop_count(N, K), syrkx_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syrkx_batched.hpp000066400000000000000000000224421434647641600241110ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyrkxBatchedModel = ArgumentModel; inline void testname_syrkx_batched(const Arguments& arg, std::string& name) { hipblasSyrkxBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syrkx_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrkxBatchedFn = FORTRAN ? hipblasSyrkxBatched : hipblasSyrkxBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K)) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } int K1 = (transA == HIPBLAS_OP_N ? K : N); size_t A_size = size_t(lda) * K1; size_t B_size = size_t(ldb) * K1; size_t C_size = size_t(ldc) * N; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(B_size, 1, batch_count); host_batch_vector hC_host(C_size, 1, batch_count); host_batch_vector hC_device(C_size, 1, batch_count); host_batch_vector hC_gold(C_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_batch_vector dC(C_size, 1, batch_count); device_vector d_alpha(1); device_vector d_beta(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); hipblas_init(hA, true); hipblas_init(hB); hipblas_init(hC_host); hC_device.copy_from(hC_host); hC_gold.copy_from(hC_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dC.transfer_from(hC_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyrkxBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, &h_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_host.transfer_from(dC)); CHECK_HIP_ERROR(dC.transfer_from(hC_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyrkxBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); CHECK_HIP_ERROR(hC_device.transfer_from(dC)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { syrkx_reference( uplo, transA, N, K, h_alpha, hA[b], lda, hB[b], ldb, h_beta, hC_gold[b], ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, hC_gold, hC_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', N, N, ldc, hC_gold, hC_host, batch_count); hipblas_error_device = norm_check_general('F', N, N, ldc, hC_gold, hC_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyrkxBatchedFn(handle, uplo, transA, N, K, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, d_beta, dC.ptr_on_device(), ldc, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSyrkxBatchedModel{}.log_args(std::cout, arg, gpu_time_used, syrkx_gflop_count(N, K), syrkx_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_syrkx_strided_batched.hpp000066400000000000000000000261201434647641600256240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasSyrkxStridedBatchedModel = ArgumentModel; inline void testname_syrkx_strided_batched(const Arguments& arg, std::string& name) { hipblasSyrkxStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_syrkx_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasSyrkxStridedBatchedFn = FORTRAN ? hipblasSyrkxStridedBatched : hipblasSyrkxStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int K = arg.K; int lda = arg.lda; int ldb = arg.ldb; int ldc = arg.ldc; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); T h_beta = arg.get_beta(); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(N < 0 || K < 0 || ldc < N || (transA == HIPBLAS_OP_N && (lda < N || ldb < N)) || (transA != HIPBLAS_OP_N && (lda < K || ldb < K)) || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } int K1 = transA == HIPBLAS_OP_N ? K : N; hipblasStride stride_A = size_t(lda) * K1 * stride_scale; hipblasStride stride_B = size_t(ldb) * K1 * stride_scale; hipblasStride stride_C = size_t(ldc) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t B_size = stride_B * batch_count; size_t C_size = stride_C * batch_count; // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(B_size); host_vector hC_host(C_size); host_vector hC_device(C_size); host_vector hC_gold(C_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dC(C_size); device_vector d_alpha(1); device_vector d_beta(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU srand(1); hipblas_init(hA, N, K1, lda, stride_A, batch_count); hipblas_init(hB, N, K1, ldb, stride_B, batch_count); hipblas_init(hC_host, N, N, ldc, stride_C, batch_count); hC_gold = hC_device = hC_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_host, sizeof(T) * C_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_beta, &h_beta, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasSyrkxStridedBatchedFn(handle, uplo, transA, N, K, &h_alpha, dA, lda, stride_A, dB, ldb, stride_B, &h_beta, dC, ldc, stride_C, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hC_host, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dC, hC_device, sizeof(T) * C_size, hipMemcpyDeviceToHost)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasSyrkxStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hC_device, dC, sizeof(T) * C_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int batch = 0; batch < batch_count; batch++) { // B must == A to use syrk as reference syrkx_reference(uplo, transA, N, K, h_alpha, hA.data() + batch * stride_A, lda, hB.data() + batch * stride_B, ldb, h_beta, hC_gold.data() + batch * stride_C, ldc); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_host); unit_check_general(N, N, batch_count, ldc, stride_C, hC_gold, hC_host); } if(arg.norm_check) { hipblas_error_host = std::abs( norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_host, batch_count)); hipblas_error_device = std::abs( norm_check_general('F', N, N, ldc, stride_C, hC_gold, hC_device, batch_count)); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasSyrkxStridedBatchedFn(handle, uplo, transA, N, K, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, d_beta, dC, ldc, stride_C, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasSyrkxStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, syrkx_gflop_count(N, K), syrkx_gbyte_count(N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tbmv.hpp000066400000000000000000000135051434647641600222270ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTbmvModel = ArgumentModel; inline void testname_tbmv(const Arguments& arg, std::string& name) { hipblasTbmvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tbmv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTbmvFn = FORTRAN ? hipblasTbmv : hipblasTbmv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int K = arg.K; int lda = arg.lda; int incx = arg.incx; int abs_incx = incx >= 0 ? incx : -incx; size_t A_size = size_t(lda) * M; size_t x_size = size_t(M) * abs_incx; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || K < 0 || lda < K + 1 || !incx; if(invalid_size || !M) { hipblasStatus_t actual = hipblasTbmvFn(handle, uplo, transA, diag, M, K, nullptr, lda, nullptr, incx); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(x_size); host_vector hx_cpu(x_size); host_vector hx_res(x_size); device_vector dA(A_size); device_vector dx(x_size); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix(hA, arg, A_size, 1, 1, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, M, abs_incx, 0, 1, hipblas_client_never_set_nan, false, true); // copy vector is easy in STL; hz = hy: save a copy in hz which will be output of CPU BLAS hx_cpu = hx; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTbmvFn(handle, uplo, transA, diag, M, K, dA, lda, dx, incx)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx_res.data(), dx, sizeof(T) * x_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_tbmv(uplo, transA, diag, M, K, hA.data(), lda, hx_cpu.data(), incx); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, abs_incx, hx_cpu, hx_res); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, M, abs_incx, hx_cpu.data(), hx_res.data()); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTbmvFn(handle, uplo, transA, diag, M, K, dA, lda, dx, incx)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTbmvModel{}.log_args(std::cout, arg, gpu_time_used, tbmv_gflop_count(M, K), tbmv_gbyte_count(M, K), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tbmv_batched.hpp000066400000000000000000000160031434647641600236750ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTbmvBatchedModel = ArgumentModel; inline void testname_tbmv_batched(const Arguments& arg, std::string& name) { hipblasTbmvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tbmv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTbmvBatchedFn = FORTRAN ? hipblasTbmvBatched : hipblasTbmvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int K = arg.K; int lda = arg.lda; int incx = arg.incx; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; size_t A_size = size_t(lda) * M; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || K < 0 || lda < K + 1 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTbmvBatchedFn( handle, uplo, transA, diag, M, K, nullptr, lda, nullptr, incx, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error; // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(M, incx, batch_count); host_batch_vector hx_cpu(M, incx, batch_count); host_batch_vector hx_res(M, incx, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(M, incx, batch_count); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_never_set_nan, false, true); hx_cpu.copy_from(hx); CHECK_HIP_ERROR(dx.transfer_from(hx)); CHECK_HIP_ERROR(dA.transfer_from(hA)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTbmvBatchedFn(handle, uplo, transA, diag, M, K, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, batch_count)); CHECK_HIP_ERROR(hx_res.transfer_from(dx)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_tbmv(uplo, transA, diag, M, K, hA[b], lda, hx_cpu[b], incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incx, hx_cpu, hx_res); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, M, abs_incx, hx_cpu, hx_res, batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(dx.transfer_from(hx)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTbmvBatchedFn(handle, uplo, transA, diag, M, K, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTbmvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, tbmv_gflop_count(M, K), tbmv_gbyte_count(M, K), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tbmv_strided_batched.hpp000066400000000000000000000211671434647641600254220ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTbmvStridedBatchedModel = ArgumentModel; inline void testname_tbmv_strided_batched(const Arguments& arg, std::string& name) { hipblasTbmvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tbmv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTbmvStridedBatchedFn = FORTRAN ? hipblasTbmvStridedBatched : hipblasTbmvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int K = arg.K; int lda = arg.lda; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; hipblasStride stride_A = size_t(lda) * M * stride_scale; hipblasStride stride_x = size_t(M) * abs_incx * stride_scale; size_t A_size = stride_A * batch_count; size_t x_size = stride_x * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || K < 0 || lda < K + 1 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTbmvStridedBatchedFn(handle, uplo, transA, diag, M, K, nullptr, lda, stride_A, nullptr, incx, stride_x, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(x_size); host_vector hx_cpu(x_size); host_vector hx_res(x_size); device_vector dA(A_size); device_vector dx(x_size); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix( hA, arg, M, M, lda, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, M, abs_incx, stride_x, batch_count, hipblas_client_never_set_nan, false, true); hx_cpu = hx; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTbmvStridedBatchedFn( handle, uplo, transA, diag, M, K, dA, lda, stride_A, dx, incx, stride_x, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hx_res.data(), dx, sizeof(T) * x_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_tbmv(uplo, transA, diag, M, K, hA.data() + b * stride_A, lda, hx_cpu.data() + b * stride_x, incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incx, stride_x, hx_cpu, hx_res); } if(arg.norm_check) { hipblas_error = norm_check_general( 'F', 1, M, abs_incx, stride_x, hx_cpu.data(), hx_res.data(), batch_count); } } if(arg.timing) { CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTbmvStridedBatchedFn(handle, uplo, transA, diag, M, K, dA, lda, stride_A, dx, incx, stride_x, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTbmvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, tbmv_gflop_count(M, K), tbmv_gbyte_count(M, K), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tbsv.hpp000066400000000000000000000135271434647641600222410ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTbsvModel = ArgumentModel; inline void testname_tbsv(const Arguments& arg, std::string& name) { hipblasTbsvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tbsv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTbsvFn = FORTRAN ? hipblasTbsv : hipblasTbsv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int M = arg.M; int K = arg.K; int incx = arg.incx; int lda = arg.lda; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || K < 0 || lda < K + 1 || !incx; if(invalid_size || !M) { hipblasStatus_t actual = hipblasTbsvFn(handle, uplo, transA, diag, M, K, nullptr, lda, nullptr, incx); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } int abs_incx = incx < 0 ? -incx : incx; size_t size_A = size_t(M) * M; size_t size_AB = size_t(lda) * M; size_t size_x = abs_incx * size_t(M); // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(size_A); host_vector hAB(size_AB); host_vector AAT(size_A); host_vector hb(size_x); host_vector hx(size_x); host_vector hx_or_b_1(size_x); device_vector dAB(size_AB); device_vector dx_or_b(size_x); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix(hA, arg, size_A, 1, 1, 0, 1, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, M, abs_incx, 0, 1, hipblas_client_never_set_nan, false, true); hb = hx; banded_matrix_setup(uplo == HIPBLAS_FILL_MODE_UPPER, (T*)hA, M, M, K); prepare_triangular_solve((T*)hA, M, (T*)AAT, M, arg.uplo); if(diag == HIPBLAS_DIAG_UNIT) { make_unit_diagonal(uplo, (T*)hA, M, M); } regular_to_banded(uplo == HIPBLAS_FILL_MODE_UPPER, (T*)hA, M, (T*)hAB, lda, M, K); CHECK_HIP_ERROR(hipMemcpy(dAB, hAB.data(), sizeof(T) * size_AB, hipMemcpyHostToDevice)); cblas_tbmv(uplo, transA, diag, M, K, hAB, lda, hb, incx); hx_or_b_1 = hb; // copy data from CPU to device CHECK_HIP_ERROR( hipMemcpy(dx_or_b, hx_or_b_1.data(), sizeof(T) * size_x, hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR( hipblasTbsvFn(handle, uplo, transA, diag, M, K, dAB, lda, dx_or_b, incx)); // copy output from device to CPU CHECK_HIP_ERROR( hipMemcpy(hx_or_b_1.data(), dx_or_b, sizeof(T) * size_x, hipMemcpyDeviceToHost)); // Calculating error hipblas_error = std::abs(vector_norm_1(M, abs_incx, hx.data(), hx_or_b_1.data())); if(arg.unit_check) { double tolerance = std::numeric_limits>::epsilon() * 40 * M; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasTbsvFn(handle, uplo, transA, diag, M, K, dAB, lda, dx_or_b, incx)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTbsvModel{}.log_args(std::cout, arg, gpu_time_used, tbsv_gflop_count(M, K), tbsv_gbyte_count(M, K), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tbsv_batched.hpp000066400000000000000000000171211434647641600237050ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTbsvBatchedModel = ArgumentModel; inline void testname_tbsv_batched(const Arguments& arg, std::string& name) { hipblasTbsvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tbsv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTbsvBatchedFn = FORTRAN ? hipblasTbsvBatched : hipblasTbsvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int M = arg.M; int K = arg.K; int incx = arg.incx; int lda = arg.lda; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; size_t size_A = size_t(M) * M; size_t size_AB = size_t(lda) * M; size_t size_x = abs_incx * size_t(M); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || K < 0 || lda < K + 1 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTbsvBatchedFn( handle, uplo, transA, diag, M, K, nullptr, lda, nullptr, incx, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(size_A, 1, batch_count); host_batch_vector hAB(size_AB, 1, batch_count); host_batch_vector AAT(size_A, 1, batch_count); host_batch_vector hb(M, incx, batch_count); host_batch_vector hx(M, incx, batch_count); host_batch_vector hx_or_b(M, incx, batch_count); device_batch_vector dAB(size_AB, 1, batch_count); device_batch_vector dx_or_b(M, incx, batch_count); CHECK_HIP_ERROR(dAB.memcheck()); CHECK_HIP_ERROR(dx_or_b.memcheck()); double gpu_time_used, hipblas_error, cumulative_hipblas_error = 0; // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_alpha_sets_nan, false, true); hb.copy_from(hx); for(int b = 0; b < batch_count; b++) { banded_matrix_setup(uplo == HIPBLAS_FILL_MODE_UPPER, (T*)hA[b], M, M, K); prepare_triangular_solve((T*)hA[b], M, (T*)AAT[b], M, arg.uplo); if(diag == HIPBLAS_DIAG_UNIT) { make_unit_diagonal(uplo, (T*)hA[b], M, M); } regular_to_banded(uplo == HIPBLAS_FILL_MODE_UPPER, (T*)hA[b], M, (T*)hAB[b], lda, M, K); // Calculate hb = hA*hx; cblas_tbmv(uplo, transA, diag, M, K, hAB[b], lda, hb[b], incx); } hx_or_b.copy_from(hb); CHECK_HIP_ERROR(dAB.transfer_from(hAB)); CHECK_HIP_ERROR(dx_or_b.transfer_from(hx_or_b)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasTbsvBatchedFn(handle, uplo, transA, diag, M, K, dAB.ptr_on_device(), lda, dx_or_b.ptr_on_device(), incx, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hx_or_b.transfer_from(dx_or_b)); // Calculating error // For norm_check/bench, currently taking the cumulative sum of errors over all batches for(int b = 0; b < batch_count; b++) { hipblas_error = std::abs(vector_norm_1(M, abs_incx, hx[b], hx_or_b[b])); if(arg.unit_check) { double tolerance = std::numeric_limits>::epsilon() * 40 * M; unit_check_error(hipblas_error, tolerance); } cumulative_hipblas_error += hipblas_error; } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTbsvBatchedFn(handle, uplo, transA, diag, M, K, dAB.ptr_on_device(), lda, dx_or_b.ptr_on_device(), incx, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTbsvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, tbsv_gflop_count(M, K), tbsv_gbyte_count(M, K), cumulative_hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tbsv_strided_batched.hpp000066400000000000000000000232601434647641600254240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTbsvStridedBatchedModel = ArgumentModel; inline void testname_tbsv_strided_batched(const Arguments& arg, std::string& name) { hipblasTbsvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tbsv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTbsvStridedBatchedFn = FORTRAN ? hipblasTbsvStridedBatched : hipblasTbsvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int M = arg.M; int K = arg.K; int incx = arg.incx; int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; hipblasStride strideA = size_t(M) * M; hipblasStride strideAB = size_t(M) * lda * stride_scale; hipblasStride stridex = size_t(abs_incx) * M * stride_scale; size_t size_A = strideA * batch_count; size_t size_AB = strideAB * batch_count; size_t size_x = stridex * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || K < 0 || lda < K + 1 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTbsvStridedBatchedFn(handle, uplo, transA, diag, M, K, nullptr, lda, strideA, nullptr, incx, stridex, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(size_A); host_vector hAB(size_AB); host_vector AAT(size_A); host_vector hb(size_x); host_vector hx(size_x); host_vector hx_or_b_1(size_x); device_vector dAB(size_AB); device_vector dx_or_b(size_x); double gpu_time_used, hipblas_error, cumulative_hipblas_error = 0; // Initial Data on CPU hipblas_init_matrix(hA, arg, M, M, M, strideA, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, M, abs_incx, stridex, batch_count, hipblas_client_never_set_nan, false, true); hb = hx; for(int b = 0; b < batch_count; b++) { T* hAbat = hA.data() + b * strideA; T* hABbat = hAB.data() + b * strideAB; T* AATbat = AAT.data() + b * strideA; T* hbbat = hb.data() + b * stridex; banded_matrix_setup(uplo == HIPBLAS_FILL_MODE_UPPER, hAbat, M, M, K); prepare_triangular_solve(hAbat, M, AATbat, M, arg.uplo); if(diag == HIPBLAS_DIAG_UNIT) { make_unit_diagonal(uplo, hAbat, M, M); } regular_to_banded(uplo == HIPBLAS_FILL_MODE_UPPER, hAbat, M, hABbat, lda, M, K); // Calculate hb = hA*hx; cblas_tbmv(uplo, transA, diag, M, K, hABbat, lda, hbbat, incx); } hx_or_b_1 = hb; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dAB, hAB.data(), sizeof(T) * size_AB, hipMemcpyHostToDevice)); CHECK_HIP_ERROR( hipMemcpy(dx_or_b, hx_or_b_1.data(), sizeof(T) * size_x, hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasTbsvStridedBatchedFn(handle, uplo, transA, diag, M, K, dAB, lda, strideAB, dx_or_b, incx, stridex, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR( hipMemcpy(hx_or_b_1.data(), dx_or_b, sizeof(T) * size_x, hipMemcpyDeviceToHost)); // Calculating error // For norm_check/bench, currently taking the cumulative sum of errors over all batches for(int b = 0; b < batch_count; b++) { hipblas_error = std::abs(vector_norm_1( M, abs_incx, hx.data() + b * stridex, hx_or_b_1.data() + b * stridex)); if(arg.unit_check) { double tolerance = std::numeric_limits>::epsilon() * 40 * M; unit_check_error(hipblas_error, tolerance); } cumulative_hipblas_error += hipblas_error; } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTbsvStridedBatchedFn(handle, uplo, transA, diag, M, K, dAB, lda, strideAB, dx_or_b, incx, stridex, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTbsvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, tbsv_gflop_count(M, K), tbsv_gbyte_count(M, K), cumulative_hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tpmv.hpp000066400000000000000000000130371434647641600222450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTpmvModel = ArgumentModel; inline void testname_tpmv(const Arguments& arg, std::string& name) { hipblasTpmvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tpmv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTpmvFn = FORTRAN ? hipblasTpmv : hipblasTpmv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int incx = arg.incx; int abs_incx = incx >= 0 ? incx : -incx; size_t x_size = size_t(M) * abs_incx; size_t A_size = size_t(M) * (M + 1) / 2; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || !incx; if(invalid_size || !M) { hipblasStatus_t actual = hipblasTpmvFn(handle, uplo, transA, diag, M, nullptr, nullptr, incx); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(x_size); host_vector hres(x_size); device_vector dA(A_size); device_vector dx(x_size); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix(hA, arg, A_size, 1, 1, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, M, abs_incx, 0, 1, hipblas_client_never_set_nan, false, true); // copy vector is easy in STL; hz = hy: save a copy in hz which will be output of CPU BLAS hres = hx; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTpmvFn(handle, uplo, transA, diag, M, dA, dx, incx)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hres.data(), dx, sizeof(T) * x_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_tpmv(uplo, transA, diag, M, hA.data(), hx.data(), incx); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, abs_incx, hx, hres); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, M, abs_incx, hx.data(), hres.data()); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTpmvFn(handle, uplo, transA, diag, M, dA, dx, incx)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTpmvModel{}.log_args(std::cout, arg, gpu_time_used, tpmv_gflop_count(M), tpmv_gbyte_count(M), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tpmv_batched.hpp000066400000000000000000000151141434647641600237150ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTpmvBatchedModel = ArgumentModel; inline void testname_tpmv_batched(const Arguments& arg, std::string& name) { hipblasTpmvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tpmv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTpmvBatchedFn = FORTRAN ? hipblasTpmvBatched : hipblasTpmvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int incx = arg.incx; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; size_t A_size = size_t(M) * (M + 1) / 2; hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTpmvBatchedFn( handle, uplo, transA, diag, M, nullptr, nullptr, incx, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error; // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(M, incx, batch_count); host_batch_vector hx_res(M, incx, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(M, incx, batch_count); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_never_set_nan, false, true); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTpmvBatchedFn(handle, uplo, transA, diag, M, dA.ptr_on_device(), dx.ptr_on_device(), incx, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hx_res.transfer_from(dx)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_tpmv(uplo, transA, diag, M, hA[b], hx[b], incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incx, hx, hx_res); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, M, abs_incx, hx, hx_res, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTpmvBatchedFn(handle, uplo, transA, diag, M, dA.ptr_on_device(), dx.ptr_on_device(), incx, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTpmvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, tpmv_gflop_count(M), tpmv_gbyte_count(M), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tpmv_strided_batched.hpp000066400000000000000000000146111434647641600254340ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTpmvStridedBatchedModel = ArgumentModel; inline void testname_tpmv_strided_batched(const Arguments& arg, std::string& name) { hipblasTpmvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tpmv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTpmvStridedBatchedFn = FORTRAN ? hipblasTpmvStridedBatched : hipblasTpmvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; size_t dim_A = size_t(M) * (M + 1) / 2; hipblasStride stride_A = dim_A * stride_scale; hipblasStride stride_x = size_t(M) * abs_incx * stride_scale; size_t A_size = stride_A * batch_count; size_t X_size = stride_x * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTpmvStridedBatchedFn( handle, uplo, transA, diag, M, nullptr, stride_A, nullptr, incx, stride_x, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hres(X_size); device_vector dA(A_size); device_vector dx(X_size); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix( hA, arg, dim_A, 1, 1, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, M, abs_incx, stride_x, batch_count, hipblas_client_never_set_nan, false, true); hres = hx; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTpmvStridedBatchedFn( handle, uplo, transA, diag, M, dA, stride_A, dx, incx, stride_x, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hres.data(), dx, sizeof(T) * X_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_tpmv( uplo, transA, diag, M, hA.data() + b * stride_A, hx.data() + b * stride_x, incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incx, stride_x, hx, hres); } if(arg.norm_check) { hipblas_error = norm_check_general( 'F', 1, M, abs_incx, stride_x, hx.data(), hres.data(), batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTpmvStridedBatchedFn( handle, uplo, transA, diag, M, dA, stride_A, dx, incx, stride_x, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTpmvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, tpmv_gflop_count(M), tpmv_gbyte_count(M), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tpsv.hpp000066400000000000000000000162401434647641600222520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTpsvModel = ArgumentModel; inline void testname_tpsv(const Arguments& arg, std::string& name) { hipblasTpsvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tpsv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTpsvFn = FORTRAN ? hipblasTpsv : hipblasTpsv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int incx = arg.incx; int abs_incx = incx < 0 ? -incx : incx; size_t size_A = size_t(N) * N; size_t size_AP = size_t(N) * (N + 1) / 2; size_t size_x = abs_incx * size_t(N); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx; if(invalid_size || !N) { hipblasStatus_t actual = hipblasTpsvFn(handle, uplo, transA, diag, N, nullptr, nullptr, incx); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(size_A); host_vector hAP(size_AP); host_vector AAT(size_A); host_vector hb(size_x); host_vector hx(size_x); host_vector hx_or_b_1(size_x); host_vector hx_or_b_2(size_x); host_vector cpu_x_or_b(size_x); device_vector dAP(size_AP); device_vector dx_or_b(size_x); double gpu_time_used, hipblas_error; // Initial Data on CPU // srand(1); // hipblas_init(hA, N, N, 1); // hipblas_init(hx, 1, N, abs_incx); hipblas_init_matrix(hA, arg, size_A, 1, 1, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector( hx, arg, N, abs_incx, 0, 1, hipblas_client_never_set_nan, false, false); //true); hb = hx; // calculate AAT = hA * hA ^ T cblas_gemm(HIPBLAS_OP_N, HIPBLAS_OP_T, N, N, N, (T)1.0, hA.data(), N, hA.data(), N, (T)0.0, AAT.data(), N); // copy AAT into hA, make hA strictly diagonal dominant, and therefore SPD for(int i = 0; i < N; i++) { T t = 0.0; for(int j = 0; j < N; j++) { hA[i + j * N] = AAT[i + j * N]; t += std::abs(AAT[i + j * N]); } hA[i + i * N] = t; } // calculate Cholesky factorization of SPD matrix hA cblas_potrf(arg.uplo, N, hA.data(), N); // make hA unit diagonal if diag == rocblas_diagonal_unit if(arg.diag == 'U' || arg.diag == 'u') { if('L' == arg.uplo || 'l' == arg.uplo) for(int i = 0; i < N; i++) { T diag = hA[i + i * N]; for(int j = 0; j <= i; j++) hA[i + j * N] = hA[i + j * N] / diag; } else for(int j = 0; j < N; j++) { T diag = hA[j + j * N]; for(int i = 0; i <= j; i++) hA[i + j * N] = hA[i + j * N] / diag; } } // Calculate hb = hA*hx; cblas_trmv(uplo, transA, diag, N, hA.data(), N, hb.data(), incx); cpu_x_or_b = hb; // cpuXorB <- B hx_or_b_1 = hb; hx_or_b_2 = hb; regular_to_packed(uplo == HIPBLAS_FILL_MODE_UPPER, (T*)hA, (T*)hAP, N); // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dAP, hAP.data(), sizeof(T) * size_AP, hipMemcpyHostToDevice)); CHECK_HIP_ERROR( hipMemcpy(dx_or_b, hx_or_b_1.data(), sizeof(T) * size_x, hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasTpsvFn(handle, uplo, transA, diag, N, dAP, dx_or_b, incx)); // copy output from device to CPU CHECK_HIP_ERROR( hipMemcpy(hx_or_b_1.data(), dx_or_b, sizeof(T) * size_x, hipMemcpyDeviceToHost)); // Calculating error hipblas_error = std::abs(vector_norm_1(N, abs_incx, hx.data(), hx_or_b_1.data())); if(arg.unit_check) { double tolerance = std::numeric_limits>::epsilon() * 40 * N; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTpsvFn(handle, uplo, transA, diag, N, dAP, dx_or_b, incx)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTpsvModel{}.log_args(std::cout, arg, gpu_time_used, tpsv_gflop_count(N), tpsv_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tpsv_batched.hpp000066400000000000000000000214231434647641600237230ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTpsvBatchedModel = ArgumentModel; inline void testname_tpsv_batched(const Arguments& arg, std::string& name) { hipblasTpsvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tpsv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTpsvBatchedFn = FORTRAN ? hipblasTpsvBatched : hipblasTpsvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int incx = arg.incx; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; size_t size_A = size_t(N) * N; size_t size_AP = size_t(N) * (N + 1) / 2; size_t size_x = abs_incx * size_t(N); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasTpsvBatchedFn( handle, uplo, transA, diag, N, nullptr, nullptr, incx, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(size_A, 1, batch_count); host_batch_vector hAP(size_AP, 1, batch_count); host_batch_vector AAT(size_A, 1, batch_count); host_batch_vector hb(N, incx, batch_count); host_batch_vector hx(N, incx, batch_count); host_batch_vector hx_or_b_1(N, incx, batch_count); host_batch_vector hx_or_b_2(N, incx, batch_count); host_batch_vector cpu_x_or_b(N, incx, batch_count); device_batch_vector dAP(size_AP, 1, batch_count); device_batch_vector dx_or_b(N, incx, batch_count); CHECK_HIP_ERROR(dAP.memcheck()); CHECK_HIP_ERROR(dx_or_b.memcheck()); double gpu_time_used, hipblas_error, cumulative_hipblas_error = 0; // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_never_set_nan, false, true); hb.copy_from(hx); for(int b = 0; b < batch_count; b++) { // calculate AAT = hA * hA ^ T cblas_gemm(HIPBLAS_OP_N, HIPBLAS_OP_T, N, N, N, (T)1.0, (T*)hA[b], N, (T*)hA[b], N, (T)0.0, (T*)AAT[b], N); // copy AAT into hA, make hA strictly diagonal dominant, and therefore SPD for(int i = 0; i < N; i++) { T t = 0.0; for(int j = 0; j < N; j++) { hA[b][i + j * N] = AAT[b][i + j * N]; t += std::abs(AAT[b][i + j * N]); } hA[b][i + i * N] = t; } // calculate Cholesky factorization of SPD matrix hA cblas_potrf(arg.uplo, N, hA[b], N); // make hA unit diagonal if diag == rocblas_diagonal_unit if(arg.diag == 'U' || arg.diag == 'u') { if('L' == arg.uplo || 'l' == arg.uplo) for(int i = 0; i < N; i++) { T diag = hA[b][i + i * N]; for(int j = 0; j <= i; j++) hA[b][i + j * N] = hA[b][i + j * N] / diag; } else for(int j = 0; j < N; j++) { T diag = hA[b][j + j * N]; for(int i = 0; i <= j; i++) hA[b][i + j * N] = hA[b][i + j * N] / diag; } } // Calculate hb = hA*hx; cblas_trmv(uplo, transA, diag, N, hA[b], N, hb[b], incx); regular_to_packed(uplo == HIPBLAS_FILL_MODE_UPPER, (T*)hA[b], (T*)hAP[b], N); } cpu_x_or_b.copy_from(hb); hx_or_b_1.copy_from(hb); hx_or_b_2.copy_from(hb); CHECK_HIP_ERROR(dAP.transfer_from(hAP)); CHECK_HIP_ERROR(dx_or_b.transfer_from(hx_or_b_1)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasTpsvBatchedFn(handle, uplo, transA, diag, N, dAP.ptr_on_device(), dx_or_b.ptr_on_device(), incx, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hx_or_b_1.transfer_from(dx_or_b)); // Calculating error // For norm_check/bench, currently taking the cumulative sum of errors over all batches for(int b = 0; b < batch_count; b++) { hipblas_error = std::abs(vector_norm_1(N, abs_incx, hx[b], hx_or_b_1[b])); if(arg.unit_check) { double tolerance = std::numeric_limits>::epsilon() * 40 * N; unit_check_error(hipblas_error, tolerance); } cumulative_hipblas_error += hipblas_error; } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTpsvBatchedFn(handle, uplo, transA, diag, N, dAP.ptr_on_device(), dx_or_b.ptr_on_device(), incx, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTpsvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, tpsv_gflop_count(N), tpsv_gbyte_count(N), cumulative_hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_tpsv_strided_batched.hpp000066400000000000000000000203541434647641600254430ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTpsvStridedBatchedModel = ArgumentModel; inline void testname_tpsv_strided_batched(const Arguments& arg, std::string& name) { hipblasTpsvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_tpsv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTpsvStridedBatchedFn = FORTRAN ? hipblasTpsvStridedBatched : hipblasTpsvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int N = arg.N; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int dim_AP = N * (N + 1) / 2; int abs_incx = incx < 0 ? -incx : incx; hipblasStride strideA = N * N; // only for test setup hipblasStride strideAP = dim_AP * stride_scale; hipblasStride stridex = abs_incx * N * stride_scale; size_t size_A = strideA * batch_count; size_t size_AP = strideAP * batch_count; size_t size_x = stridex * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = N < 0 || !incx || batch_count < 0; if(invalid_size || !N || !batch_count) { hipblasStatus_t actual = hipblasTpsvStridedBatchedFn( handle, uplo, transA, diag, N, nullptr, strideAP, nullptr, incx, stridex, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(size_A); host_vector hAP(size_AP); host_vector AAT(size_A); host_vector hb(size_x); host_vector hx(size_x); host_vector hx_or_b_1(size_x); host_vector hx_or_b_2(size_x); host_vector cpu_x_or_b(size_x); device_vector dAP(size_AP); device_vector dx_or_b(size_x); double gpu_time_used, hipblas_error, cumulative_hipblas_error = 0; // Initial Data on CPU hipblas_init_matrix(hA, arg, N, N, N, strideA, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, N, abs_incx, stridex, batch_count, hipblas_client_never_set_nan, false, true); hb = hx; for(int b = 0; b < batch_count; b++) { T* hAb = hA.data() + b * strideA; T* hAPb = hAP.data() + b * strideAP; T* AATb = AAT.data() + b * strideA; T* hbb = hb.data() + b * stridex; // calculate AAT = hA * hA ^ T cblas_gemm(HIPBLAS_OP_N, HIPBLAS_OP_T, N, N, N, (T)1.0, hAb, N, hAb, N, (T)0.0, AATb, N); // copy AAT into hA, make hA strictly diagonal dominant, and therefore SPD for(int i = 0; i < N; i++) { T t = 0.0; for(int j = 0; j < N; j++) { hAb[i + j * N] = AATb[i + j * N]; t += std::abs(AATb[i + j * N]); } hAb[i + i * N] = t; } // calculate Cholesky factorization of SPD matrix hA cblas_potrf(arg.uplo, N, hAb, N); // make hA unit diagonal if diag == rocblas_diagonal_unit if(arg.diag == 'U' || arg.diag == 'u') { if('L' == arg.uplo || 'l' == arg.uplo) for(int i = 0; i < N; i++) { T diag = hAb[i + i * N]; for(int j = 0; j <= i; j++) hAb[i + j * N] = hAb[i + j * N] / diag; } else for(int j = 0; j < N; j++) { T diag = hAb[j + j * N]; for(int i = 0; i <= j; i++) hAb[i + j * N] = hA[b + j * N] / diag; } } // Calculate hb = hA*hx; cblas_trmv(uplo, transA, diag, N, hAb, N, hbb, incx); regular_to_packed(uplo == HIPBLAS_FILL_MODE_UPPER, (T*)hAb, (T*)hAPb, N); } cpu_x_or_b = hb; // cpuXorB <- B hx_or_b_1 = hb; hx_or_b_2 = hb; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dAP, hAP.data(), sizeof(T) * size_AP, hipMemcpyHostToDevice)); CHECK_HIP_ERROR( hipMemcpy(dx_or_b, hx_or_b_1.data(), sizeof(T) * size_x, hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasTpsvStridedBatchedFn( handle, uplo, transA, diag, N, dAP, strideAP, dx_or_b, incx, stridex, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR( hipMemcpy(hx_or_b_1.data(), dx_or_b, sizeof(T) * size_x, hipMemcpyDeviceToHost)); // Calculating error // For norm_check/bench, currently taking the cumulative sum of errors over all batches for(int b = 0; b < batch_count; b++) { hipblas_error = std::abs(vector_norm_1( N, abs_incx, hx.data() + b * stridex, hx_or_b_1.data() + b * stridex)); if(arg.unit_check) { double tolerance = std::numeric_limits>::epsilon() * 40 * N; unit_check_error(hipblas_error, tolerance); } cumulative_hipblas_error += hipblas_error; } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTpsvStridedBatchedFn( handle, uplo, transA, diag, N, dAP, strideAP, dx_or_b, incx, stridex, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTpsvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, tpsv_gflop_count(N), tpsv_gbyte_count(N), cumulative_hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trmm.hpp000066400000000000000000000146511434647641600222410ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrmmModel = ArgumentModel; inline void testname_trmm(const Arguments& arg, std::string& name) { hipblasTrmmModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trmm(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrmmFn = FORTRAN ? hipblasTrmm : hipblasTrmm; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; T h_alpha = arg.get_alpha(); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || lda < K || ldb < M) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB_host(B_size); host_vector hB_device(B_size); host_vector hB_gold(B_size); device_vector dA(A_size); device_vector dB(B_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix(hA, arg, K, K, lda, 0, 1, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix(hB_host, arg, M, N, ldb, 0, 1, hipblas_client_alpha_sets_nan, false, true); hB_device = hB_host; hB_gold = hB_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_host, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasTrmmFn(handle, side, uplo, transA, diag, M, N, &h_alpha, dA, lda, dB, ldb)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hB_host, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_device, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasTrmmFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, dB, ldb)); CHECK_HIP_ERROR(hipMemcpy(hB_device, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_trmm(side, uplo, transA, diag, M, N, h_alpha, hA, lda, hB_gold, ldb); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, ldb, hB_gold, hB_host); unit_check_general(M, N, ldb, hB_gold, hB_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldb, hB_gold, hB_host); hipblas_error_device = norm_check_general('F', M, N, ldb, hB_gold, hB_device); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasTrmmFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, dB, ldb)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrmmModel{}.log_args(std::cout, arg, gpu_time_used, trmm_gflop_count(M, N, K), trmm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trmm_batched.hpp000066400000000000000000000221671434647641600237140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrmmBatchedModel = ArgumentModel; inline void testname_trmm_batched(const Arguments& arg, std::string& name) { hipblasTrmmBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trmm_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrmmBatchedFn = FORTRAN ? hipblasTrmmBatched : hipblasTrmmBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; int K = (side == HIPBLAS_SIDE_LEFT ? M : N); size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(M < 0 || N < 0 || lda < K || ldb < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } else if(batch_count == 0) { return HIPBLAS_STATUS_SUCCESS; } double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // host arrays host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB_host(B_size, 1, batch_count); host_batch_vector hB_device(B_size, 1, batch_count); host_batch_vector hB_gold(B_size, 1, batch_count); // device arrays device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_alpha_sets_nan, true); hipblas_init_vector(hB_host, arg, hipblas_client_alpha_sets_nan, false, true); hB_device.copy_from(hB_host); hB_gold.copy_from(hB_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrmmBatchedFn(handle, side, uplo, transA, diag, M, N, &h_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, batch_count)); CHECK_HIP_ERROR(hB_host.transfer_from(dB)); CHECK_HIP_ERROR(dB.transfer_from(hB_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasTrmmBatchedFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, batch_count)); CHECK_HIP_ERROR(hB_device.transfer_from(dB)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trmm(side, uplo, transA, diag, M, N, h_alpha, hA[b], lda, hB_gold[b], ldb); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldb, hB_gold, hB_host); unit_check_general(M, N, batch_count, ldb, hB_gold, hB_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldb, hB_gold, hB_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldb, hB_gold, hB_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrmmBatchedFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrmmBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trmm_gflop_count(M, N, K), trmm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trmm_strided_batched.hpp000066400000000000000000000250221434647641600254230ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrmmStridedBatchedModel = ArgumentModel; inline void testname_trmm_strided_batched(const Arguments& arg, std::string& name) { hipblasTrmmStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trmm_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrmmStridedBatchedFn = FORTRAN ? hipblasTrmmStridedBatched : hipblasTrmmStridedBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); hipblasStatus_t status = HIPBLAS_STATUS_SUCCESS; int K = (side == HIPBLAS_SIDE_LEFT ? M : N); hipblasStride stride_A = size_t(lda) * K * stride_scale; hipblasStride stride_B = size_t(ldb) * N * stride_scale; size_t A_size = stride_A * batch_count; size_t B_size = stride_B * batch_count; // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory if(M < 0 || N < 0 || lda < K || ldb < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB_host(B_size); host_vector hB_device(B_size); host_vector hB_gold(B_size); device_vector dA(A_size); device_vector dB(B_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial Data on CPU hipblas_init_matrix( hA, arg, K, K, lda, stride_A, batch_count, hipblas_client_alpha_sets_nan, true); hipblas_init_matrix( hB_host, arg, M, N, ldb, stride_B, batch_count, hipblas_client_alpha_sets_nan, false, true); hB_device = hB_host; hB_gold = hB_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_host, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrmmStridedBatchedFn(handle, side, uplo, transA, diag, M, N, &h_alpha, dA, lda, stride_A, dB, ldb, stride_B, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hB_host, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_device, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasTrmmStridedBatchedFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hB_device, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trmm(side, uplo, transA, diag, M, N, h_alpha, hA.data() + b * stride_A, lda, hB_gold.data() + b * stride_B, ldb); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(M, N, batch_count, ldb, stride_B, hB_gold, hB_host); unit_check_general(M, N, batch_count, ldb, stride_B, hB_gold, hB_device); } if(arg.norm_check) { hipblas_error_host = norm_check_general('F', M, N, ldb, stride_B, hB_gold, hB_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldb, stride_B, hB_gold, hB_device, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrmmStridedBatchedFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, stride_A, dB, ldb, stride_B, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrmmStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trmm_gflop_count(M, N, K), trmm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trmv.hpp000066400000000000000000000131331434647641600222440ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrmvModel = ArgumentModel; inline void testname_trmv(const Arguments& arg, std::string& name) { hipblasTrmvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trmv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrmvFn = FORTRAN ? hipblasTrmv : hipblasTrmv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int lda = arg.lda; int incx = arg.incx; int abs_incx = incx >= 0 ? incx : -incx; size_t x_size = size_t(M) * abs_incx; size_t A_size = size_t(lda) * M; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || lda < M || lda < 1 || !incx; if(invalid_size || !M) { hipblasStatus_t actual = hipblasTrmvFn(handle, uplo, transA, diag, M, nullptr, lda, nullptr, incx); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(x_size); host_vector hres(x_size); device_vector dA(A_size); device_vector dx(x_size); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix(hA, arg, M, M, lda, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, M, abs_incx, 0, 1, hipblas_client_never_set_nan, false, true); // copy vector is easy in STL; hz = hy: save a copy in hz which will be output of CPU BLAS hres = hx; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * x_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTrmvFn(handle, uplo, transA, diag, M, dA, lda, dx, incx)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hres.data(), dx, sizeof(T) * x_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_trmv(uplo, transA, diag, M, hA.data(), lda, hx.data(), incx); // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, abs_incx, hx, hres); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, M, abs_incx, hx.data(), hres.data()); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrmvFn(handle, uplo, transA, diag, M, dA, lda, dx, incx)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrmvModel{}.log_args(std::cout, arg, gpu_time_used, trmv_gflop_count(M), trmv_gbyte_count(M), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trmv_batched.hpp000066400000000000000000000152171434647641600237230ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrmvBatchedModel = ArgumentModel; inline void testname_trmv_batched(const Arguments& arg, std::string& name) { hipblasTrmvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trmv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrmvBatchedFn = FORTRAN ? hipblasTrmvBatched : hipblasTrmvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int lda = arg.lda; int incx = arg.incx; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; size_t A_size = size_t(lda) * M; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || lda < M || lda < 1 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTrmvBatchedFn( handle, uplo, transA, diag, M, nullptr, lda, nullptr, incx, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } double gpu_time_used, hipblas_error; // arrays of pointers-to-host on host host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hx(M, incx, batch_count); host_batch_vector hres(M, incx, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dx(M, incx, batch_count); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx.memcheck()); hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_never_set_nan, false, true); hres.copy_from(hx); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dx.transfer_from(hx)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTrmvBatchedFn(handle, uplo, transA, diag, M, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, batch_count)); CHECK_HIP_ERROR(hres.transfer_from(dx)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trmv(uplo, transA, diag, M, hA[b], lda, hx[b], incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incx, hx, hres); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, M, abs_incx, hx, hres, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrmvBatchedFn(handle, uplo, transA, diag, M, dA.ptr_on_device(), lda, dx.ptr_on_device(), incx, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrmvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trmv_gflop_count(M), trmv_gbyte_count(M), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trmv_strided_batched.hpp000066400000000000000000000162461434647641600254440ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrmvStridedBatchedModel = ArgumentModel; inline void testname_trmv_strided_batched(const Arguments& arg, std::string& name) { hipblasTrmvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trmv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrmvStridedBatchedFn = FORTRAN ? hipblasTrmvStridedBatched : hipblasTrmvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int lda = arg.lda; int incx = arg.incx; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx >= 0 ? incx : -incx; hipblasStride stride_A = size_t(lda) * M * stride_scale; hipblasStride stride_x = size_t(M) * abs_incx * stride_scale; size_t A_size = stride_A * batch_count; size_t X_size = stride_x * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || lda < M || lda < 1 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTrmvStridedBatchedFn(handle, uplo, transA, diag, M, nullptr, lda, stride_A, nullptr, incx, stride_x, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hx(X_size); host_vector hres(X_size); device_vector dA(A_size); device_vector dx(X_size); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix( hA, arg, M, M, lda, stride_A, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, M, abs_incx, stride_x, batch_count, hipblas_client_never_set_nan, false, true); hres = hx; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dx, hx.data(), sizeof(T) * X_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTrmvStridedBatchedFn( handle, uplo, transA, diag, M, dA, lda, stride_A, dx, incx, stride_x, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hres.data(), dx, sizeof(T) * X_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trmv(uplo, transA, diag, M, hA.data() + b * stride_A, lda, hx.data() + b * stride_x, incx); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { unit_check_general(1, M, batch_count, abs_incx, stride_x, hx, hres); } if(arg.norm_check) { hipblas_error = norm_check_general('F', 1, M, abs_incx, stride_x, hx, hres, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrmvStridedBatchedFn( handle, uplo, transA, diag, M, dA, lda, stride_A, dx, incx, stride_x, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrmvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trmv_gflop_count(M), trmv_gbyte_count(M), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trsm.hpp000066400000000000000000000164171434647641600222510ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrsmModel = ArgumentModel; inline void testname_trsm(const Arguments& arg, std::string& name) { hipblasTrsmModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trsm(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrsmFn = FORTRAN ? hipblasTrsm : hipblasTrsm; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; T h_alpha = arg.get_alpha(); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || lda < K || ldb < M) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB_host(B_size); host_vector hB_device(B_size); host_vector hB_gold(B_size); device_vector dA(A_size); device_vector dB(B_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init_matrix(hA, arg, K, K, lda, 0, 1, hipblas_client_never_set_nan, true); hipblas_init_matrix(hB_host, arg, M, N, ldb, 0, 1, hipblas_client_never_set_nan); // pad untouched area into zero for(int i = K; i < lda; i++) { for(int j = 0; j < K; j++) { hA[i + j * lda] = 0.0; } } // proprocess the matrix to avoid ill-conditioned matrix std::vector ipiv(K); cblas_getrf(K, K, hA.data(), lda, ipiv.data()); for(int i = 0; i < K; i++) { for(int j = i; j < K; j++) { hA[i + j * lda] = hA[j + i * lda]; if(diag == HIPBLAS_DIAG_UNIT) { if(i == j) hA[i + j * lda] = 1.0; } } } // pad untouched area into zero for(int i = M; i < ldb; i++) { for(int j = 0; j < N; j++) { hB_host[i + j * ldb] = 0.0; } } hB_gold = hB_host; // original solution hX // Calculate hB = hA*hX; cblas_trmm( side, uplo, transA, diag, M, N, T(1.0) / h_alpha, (const T*)hA, lda, hB_host, ldb); hB_device = hB_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_host, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR( hipblasTrsmFn(handle, side, uplo, transA, diag, M, N, &h_alpha, dA, lda, dB, ldb)); CHECK_HIP_ERROR(hipMemcpy(hB_host, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_device, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR( hipblasTrsmFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, dB, ldb)); CHECK_HIP_ERROR(hipMemcpy(hB_device, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ // cblas_trsm( // side, uplo, transA, diag, M, N, h_alpha, (const T*)hA, lda, hB_gold, ldb); // if enable norm check, norm check is invasive real_t eps = std::numeric_limits>::epsilon(); double tolerance = eps * 40 * M; hipblas_error_host = norm_check_general('F', M, N, ldb, hB_gold, hB_host); hipblas_error_device = norm_check_general('F', M, N, ldb, hB_gold, hB_device); if(arg.unit_check) { unit_check_error(hipblas_error_host, tolerance); unit_check_error(hipblas_error_device, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasTrsmFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, dB, ldb)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrsmModel{}.log_args(std::cout, arg, gpu_time_used, trsm_gflop_count(M, N, K), trsm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trsm_batched.hpp000066400000000000000000000247441434647641600237250ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrsmBatchedModel = ArgumentModel; inline void testname_trsm_batched(const Arguments& arg, std::string& name) { hipblasTrsmBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trsm_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrsmBatchedFn = FORTRAN ? hipblasTrsmBatched : hipblasTrsmBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; // check here to prevent undefined memory allocation error // TODO: Workaround for cuda tests, not actually testing return values if(M < 0 || N < 0 || lda < K || ldb < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(!M || !N || !lda || !ldb || !batch_count) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB_host(B_size, 1, batch_count); host_batch_vector hB_device(B_size, 1, batch_count); host_batch_vector hB_gold(B_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hB_host, arg, hipblas_client_never_set_nan); for(int b = 0; b < batch_count; b++) { // pad untouched area into zero for(int i = K; i < lda; i++) { for(int j = 0; j < K; j++) { hA[b][i + j * lda] = 0.0; } } // proprocess the matrix to avoid ill-conditioned matrix std::vector ipiv(K); cblas_getrf(K, K, hA[b], lda, ipiv.data()); for(int i = 0; i < K; i++) { for(int j = i; j < K; j++) { hA[b][i + j * lda] = hA[b][j + i * lda]; if(diag == HIPBLAS_DIAG_UNIT) { if(i == j) hA[b][i + j * lda] = 1.0; } } } // pad untouched area into zero for(int i = M; i < ldb; i++) { for(int j = 0; j < N; j++) { hB_host[b][i + j * ldb] = 0.0; } } // hB_gold[b] = hB_host[b]; // original solution hX // Calculate hB = hA*hX; cblas_trmm(side, uplo, transA, diag, M, N, T(1.0) / h_alpha, (const T*)hA[b], lda, hB_host[b], ldb); } hB_gold.copy_from(hB_host); hB_device.copy_from(hB_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrsmBatchedFn(handle, side, uplo, transA, diag, M, N, &h_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, batch_count)); CHECK_HIP_ERROR(hB_host.transfer_from(dB)); CHECK_HIP_ERROR(dB.transfer_from(hB_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasTrsmBatchedFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, batch_count)); CHECK_HIP_ERROR(hB_device.transfer_from(dB)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trsm( side, uplo, transA, diag, M, N, h_alpha, (const T*)hA[b], lda, hB_gold[b], ldb); } // if enable norm check, norm check is invasive real_t eps = std::numeric_limits>::epsilon(); double tolerance = eps * 40 * M; hipblas_error_host = norm_check_general('F', M, N, ldb, hB_gold, hB_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldb, hB_gold, hB_device, batch_count); if(arg.unit_check) { unit_check_error(hipblas_error_host, tolerance); unit_check_error(hipblas_error_device, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasTrsmBatchedFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrsmBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trsm_gflop_count(M, N, K), trsm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trsm_batched_ex.hpp000066400000000000000000000323221434647641600244100ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" #define TRSM_BLOCK 128 /* ============================================================================================ */ using hipblasTrsmBatchedExModel = ArgumentModel; inline void testname_trsm_batched_ex(const Arguments& arg, std::string& name) { hipblasTrsmBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trsm_batched_ex(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrsmBatchedExFn = FORTRAN ? hipblasTrsmBatchedExFortran : hipblasTrsmBatchedEx; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; // check here to prevent undefined memory allocation error // TODO: Workaround for cuda tests, not actually testing return values if(M < 0 || N < 0 || lda < K || ldb < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(!M || !N || !lda || !ldb || !batch_count) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB_host(B_size, 1, batch_count); host_batch_vector hB_device(B_size, 1, batch_count); host_batch_vector hB_cpu(B_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dB(B_size, 1, batch_count); device_batch_vector dinvA(TRSM_BLOCK * K, 1, batch_count); device_vector d_alpha(1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dinvA.memcheck()); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hB_host, arg, hipblas_client_never_set_nan); for(int b = 0; b < batch_count; b++) { // pad untouched area into zero for(int i = K; i < lda; i++) { for(int j = 0; j < K; j++) { hA[b][i + j * lda] = 0.0; } } // proprocess the matrix to avoid ill-conditioned matrix host_vector ipiv(K); cblas_getrf(K, K, hA[b], lda, ipiv); for(int i = 0; i < K; i++) { for(int j = i; j < K; j++) { hA[b][i + j * lda] = hA[b][j + i * lda]; if(diag == HIPBLAS_DIAG_UNIT) { if(i == j) hA[b][i + j * lda] = 1.0; } } } // pad untouched area into zero for(int i = M; i < ldb; i++) { for(int j = 0; j < N; j++) { hB_host[b][i + j * ldb] = 0.0; } } // Calculate hB = hA*hX; cblas_trmm(side, uplo, transA, diag, M, N, T(1.0) / h_alpha, (const T*)hA[b], lda, hB_host[b], ldb); } hB_device.copy_from(hB_host); hB_cpu.copy_from(hB_host); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB_host)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); // calculate invA hipblasStride stride_A = TRSM_BLOCK * lda + TRSM_BLOCK; hipblasStride stride_invA = TRSM_BLOCK * TRSM_BLOCK; int blocks = K / TRSM_BLOCK; for(int b = 0; b < batch_count; b++) { if(blocks > 0) { CHECK_HIPBLAS_ERROR(hipblasTrtriStridedBatched(handle, uplo, diag, TRSM_BLOCK, dA[b], lda, stride_A, dinvA[b], TRSM_BLOCK, stride_invA, blocks)); } if(K % TRSM_BLOCK != 0 || blocks == 0) { CHECK_HIPBLAS_ERROR(hipblasTrtriStridedBatched(handle, uplo, diag, K - TRSM_BLOCK * blocks, dA[b] + stride_A * blocks, lda, stride_A, dinvA[b] + stride_invA * blocks, TRSM_BLOCK, stride_invA, 1)); } } if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrsmBatchedExFn(handle, side, uplo, transA, diag, M, N, &h_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, batch_count, dinvA.ptr_on_device(), TRSM_BLOCK * K, arg.compute_type)); CHECK_HIP_ERROR(hB_host.transfer_from(dB)); CHECK_HIP_ERROR(dB.transfer_from(hB_device)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasTrsmBatchedExFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, batch_count, dinvA.ptr_on_device(), TRSM_BLOCK * K, arg.compute_type)); CHECK_HIP_ERROR(hB_device.transfer_from(dB)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trsm( side, uplo, transA, diag, M, N, h_alpha, (const T*)hA[b], lda, hB_cpu[b], ldb); } // if enable norm check, norm check is invasive real_t eps = std::numeric_limits>::epsilon(); double tolerance = eps * 40 * M; hipblas_error_host = norm_check_general('F', M, N, ldb, hB_cpu, hB_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldb, hB_cpu, hB_device, batch_count); if(arg.unit_check) { unit_check_error(hipblas_error_host, tolerance); unit_check_error(hipblas_error_device, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasTrsmBatchedExFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA.ptr_on_device(), lda, dB.ptr_on_device(), ldb, batch_count, dinvA.ptr_on_device(), TRSM_BLOCK * K, arg.compute_type)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrsmBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, trsm_gflop_count(M, N, K), trsm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trsm_ex.hpp000066400000000000000000000263771434647641600227530ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" #define TRSM_BLOCK 128 /* ============================================================================================ */ using hipblasTrsmExModel = ArgumentModel; inline void testname_trsm_ex(const Arguments& arg, std::string& name) { hipblasTrsmExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trsm_ex(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrsmExFn = FORTRAN ? hipblasTrsmExFortran : hipblasTrsmEx; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; T h_alpha = arg.get_alpha(); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); size_t A_size = size_t(lda) * K; size_t B_size = size_t(ldb) * N; // check here to prevent undefined memory allocation error if(M < 0 || N < 0 || lda < K || ldb < M) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB_host(B_size); host_vector hB_device(B_size); host_vector hB_cpu(B_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dinvA(TRSM_BLOCK * K); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init_matrix(hA, arg, K, K, lda, 0, 1, hipblas_client_never_set_nan, true); hipblas_init_matrix(hB_host, arg, M, N, ldb, 0, 1, hipblas_client_never_set_nan); // pad untouched area into zero for(int i = K; i < lda; i++) { for(int j = 0; j < K; j++) { hA[i + j * lda] = 0.0; } } // proprocess the matrix to avoid ill-conditioned matrix host_vector ipiv(K); cblas_getrf(K, K, hA.data(), lda, ipiv.data()); for(int i = 0; i < K; i++) { for(int j = i; j < K; j++) { hA[i + j * lda] = hA[j + i * lda]; if(diag == HIPBLAS_DIAG_UNIT) { if(i == j) hA[i + j * lda] = 1.0; } } } // pad untouched area into zero for(int i = M; i < ldb; i++) { for(int j = 0; j < N; j++) { hB_host[i + j * ldb] = 0.0; } } // Calculate hB = hA*hX; cblas_trmm(side, uplo, transA, diag, M, N, T(1.0) / h_alpha, (const T*)hA.data(), lda, hB_host.data(), ldb); hB_cpu = hB_device = hB_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_host, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); hipblasStride stride_A = TRSM_BLOCK * size_t(lda) + TRSM_BLOCK; hipblasStride stride_invA = TRSM_BLOCK * TRSM_BLOCK; int blocks = K / TRSM_BLOCK; // Calculate invA if(blocks > 0) { CHECK_HIPBLAS_ERROR(hipblasTrtriStridedBatched(handle, uplo, diag, TRSM_BLOCK, dA, lda, stride_A, dinvA, TRSM_BLOCK, stride_invA, blocks)); } if(K % TRSM_BLOCK != 0 || blocks == 0) { CHECK_HIPBLAS_ERROR(hipblasTrtriStridedBatched(handle, uplo, diag, K - TRSM_BLOCK * blocks, dA + stride_A * blocks, lda, stride_A, dinvA + stride_invA * blocks, TRSM_BLOCK, stride_invA, 1)); } if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrsmExFn(handle, side, uplo, transA, diag, M, N, &h_alpha, dA, lda, dB, ldb, dinvA, TRSM_BLOCK * K, arg.compute_type)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hB_host, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_device, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasTrsmExFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, dB, ldb, dinvA, TRSM_BLOCK * K, arg.compute_type)); CHECK_HIP_ERROR(hipMemcpy(hB_device, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_trsm( side, uplo, transA, diag, M, N, h_alpha, (const T*)hA.data(), lda, hB_cpu.data(), ldb); // if enable norm check, norm check is invasive real_t eps = std::numeric_limits>::epsilon(); double tolerance = eps * 40 * M; hipblas_error_host = norm_check_general('F', M, N, ldb, hB_cpu, hB_host); hipblas_error_device = norm_check_general('F', M, N, ldb, hB_cpu, hB_device); if(arg.unit_check) { unit_check_error(hipblas_error_host, tolerance); unit_check_error(hipblas_error_device, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrsmExFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, dB, ldb, dinvA, TRSM_BLOCK * K, arg.compute_type)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrsmExModel{}.log_args(std::cout, arg, gpu_time_used, trsm_gflop_count(M, N, K), trsm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trsm_strided_batched.hpp000066400000000000000000000273071434647641600254410ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrsmStridedBatchedModel = ArgumentModel; inline void testname_trsm_strided_batched(const Arguments& arg, std::string& name) { hipblasTrsmStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trsm_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrsmStridedBatchedFn = FORTRAN ? hipblasTrsmStridedBatched : hipblasTrsmStridedBatched; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); hipblasStride strideA = size_t(lda) * K * stride_scale; hipblasStride strideB = size_t(ldb) * N * stride_scale; size_t A_size = strideA * batch_count; size_t B_size = strideB * batch_count; // check here to prevent undefined memory allocation error // TODO: Workaround for cuda tests, not actually testing return values if(M < 0 || N < 0 || lda < K || ldb < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(!batch_count) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB_host(B_size); host_vector hB_device(B_size); host_vector hB_gold(B_size); device_vector dA(A_size); device_vector dB(B_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init_matrix( hA, arg, K, K, lda, strideA, batch_count, hipblas_client_never_set_nan, true); hipblas_init_matrix( hB_host, arg, M, N, ldb, strideB, batch_count, hipblas_client_never_set_nan); for(int b = 0; b < batch_count; b++) { T* hAb = hA.data() + b * strideA; T* hBb = hB_host.data() + b * strideB; // pad ountouched area into zero for(int i = K; i < lda; i++) { for(int j = 0; j < K; j++) { hAb[i + j * lda] = 0.0; } } // proprocess the matrix to avoid ill-conditioned matrix std::vector ipiv(K); cblas_getrf(K, K, hAb, lda, ipiv.data()); for(int i = 0; i < K; i++) { for(int j = i; j < K; j++) { hAb[i + j * lda] = hAb[j + i * lda]; if(diag == HIPBLAS_DIAG_UNIT) { if(i == j) hAb[i + j * lda] = 1.0; } } } // pad untouched area into zero for(int i = M; i < ldb; i++) { for(int j = 0; j < N; j++) { hBb[i + j * ldb] = 0.0; } } // Calculate hB = hA*hX; cblas_trmm( side, uplo, transA, diag, M, N, T(1.0) / h_alpha, (const T*)hAb, lda, hBb, ldb); } hB_gold = hB_host; // original solutions hX hB_device = hB_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_host, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrsmStridedBatchedFn(handle, side, uplo, transA, diag, M, N, &h_alpha, dA, lda, strideA, dB, ldb, strideB, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hB_host, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_device, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasTrsmStridedBatchedFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, strideA, dB, ldb, strideB, batch_count)); CHECK_HIP_ERROR(hipMemcpy(hB_device, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trsm(side, uplo, transA, diag, M, N, h_alpha, (const T*)hA.data() + b * strideA, lda, hB_gold.data() + b * strideB, ldb); } // if enable norm check, norm check is invasive real_t eps = std::numeric_limits>::epsilon(); double tolerance = eps * 40 * M; hipblas_error_host = norm_check_general('F', M, N, ldb, strideB, hB_gold, hB_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldb, strideB, hB_gold, hB_device, batch_count); if(arg.unit_check) { unit_check_error(hipblas_error_host, tolerance); unit_check_error(hipblas_error_device, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasTrsmStridedBatchedFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, strideA, dB, ldb, strideB, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrsmStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trsm_gflop_count(M, N, K), trsm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trsm_strided_batched_ex.hpp000066400000000000000000000352261434647641600261340ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" #define TRSM_BLOCK 128 /* ============================================================================================ */ using hipblasTrsmStridedBatchedExModel = ArgumentModel; inline void testname_trsm_strided_batched_ex(const Arguments& arg, std::string& name) { hipblasTrsmStridedBatchedExModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trsm_strided_batched_ex(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrsmStridedBatchedExFn = FORTRAN ? hipblasTrsmStridedBatchedEx : hipblasTrsmStridedBatchedEx; hipblasSideMode_t side = char2hipblas_side(arg.side); hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasOperation_t transA = char2hipblas_operation(arg.transA); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int M = arg.M; int N = arg.N; int lda = arg.lda; int ldb = arg.ldb; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; T h_alpha = arg.get_alpha(); int K = (side == HIPBLAS_SIDE_LEFT ? M : N); hipblasStride strideA = size_t(lda) * K * stride_scale; hipblasStride strideB = size_t(ldb) * N * stride_scale; hipblasStride stride_invA = TRSM_BLOCK * size_t(K); size_t A_size = strideA * batch_count; size_t B_size = strideB * batch_count; size_t invA_size = stride_invA * batch_count; // check here to prevent undefined memory allocation error // TODO: Workaround for cuda tests, not actually testing return values if(M < 0 || N < 0 || lda < K || ldb < M || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } if(!batch_count) { return HIPBLAS_STATUS_SUCCESS; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB_host(B_size); host_vector hB_device(B_size); host_vector hB_cpu(B_size); device_vector dA(A_size); device_vector dB(B_size); device_vector dinvA(invA_size); device_vector d_alpha(1); double gpu_time_used, hipblas_error_host, hipblas_error_device; hipblasLocalHandle handle(arg); // Initial hA on CPU hipblas_init_matrix( hA, arg, K, K, lda, strideA, batch_count, hipblas_client_never_set_nan, true); hipblas_init_matrix( hB_host, arg, M, N, ldb, strideB, batch_count, hipblas_client_never_set_nan); for(int b = 0; b < batch_count; b++) { T* hAb = hA.data() + b * strideA; T* hBb = hB_host.data() + b * strideB; // pad ountouched area into zero for(int i = K; i < lda; i++) { for(int j = 0; j < K; j++) { hAb[i + j * lda] = 0.0; } } // proprocess the matrix to avoid ill-conditioned matrix host_vector ipiv(K); cblas_getrf(K, K, hAb, lda, ipiv.data()); for(int i = 0; i < K; i++) { for(int j = i; j < K; j++) { hAb[i + j * lda] = hAb[j + i * lda]; if(diag == HIPBLAS_DIAG_UNIT) { if(i == j) hAb[i + j * lda] = 1.0; } } } // pad untouched area into zero for(int i = M; i < ldb; i++) { for(int j = 0; j < N; j++) { hBb[i + j * ldb] = 0.0; } } // Calculate hB = hA*hX; cblas_trmm( side, uplo, transA, diag, M, N, T(1.0) / h_alpha, (const T*)hAb, lda, hBb, ldb); } hB_device = hB_cpu = hB_host; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_host, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(d_alpha, &h_alpha, sizeof(T), hipMemcpyHostToDevice)); // calculate invA int sub_stride_A = TRSM_BLOCK * lda + TRSM_BLOCK; int sub_stride_invA = TRSM_BLOCK * TRSM_BLOCK; int blocks = K / TRSM_BLOCK; for(int b = 0; b < batch_count; b++) { if(blocks > 0) { CHECK_HIPBLAS_ERROR(hipblasTrtriStridedBatched(handle, uplo, diag, TRSM_BLOCK, dA + b * strideA, lda, sub_stride_A, dinvA + b * stride_invA, TRSM_BLOCK, sub_stride_invA, blocks)); } if(K % TRSM_BLOCK != 0 || blocks == 0) { CHECK_HIPBLAS_ERROR( hipblasTrtriStridedBatched(handle, uplo, diag, K - TRSM_BLOCK * blocks, dA + sub_stride_A * blocks + b * strideA, lda, sub_stride_A, dinvA + sub_stride_invA * blocks + b * stride_invA, TRSM_BLOCK, sub_stride_invA, 1)); } } if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrsmStridedBatchedExFn(handle, side, uplo, transA, diag, M, N, &h_alpha, dA, lda, strideA, dB, ldb, strideB, batch_count, dinvA, invA_size, stride_invA, arg.compute_type)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hB_host, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); CHECK_HIP_ERROR(hipMemcpy(dB, hB_device, sizeof(T) * B_size, hipMemcpyHostToDevice)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); CHECK_HIPBLAS_ERROR(hipblasTrsmStridedBatchedExFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, strideA, dB, ldb, strideB, batch_count, dinvA, invA_size, stride_invA, arg.compute_type)); CHECK_HIP_ERROR(hipMemcpy(hB_device, dB, sizeof(T) * B_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trsm(side, uplo, transA, diag, M, N, h_alpha, (const T*)hA.data() + b * strideA, lda, hB_cpu.data() + b * strideB, ldb); } // if enable norm check, norm check is invasive real_t eps = std::numeric_limits>::epsilon(); double tolerance = eps * 40 * M; hipblas_error_host = norm_check_general('F', M, N, ldb, strideB, hB_cpu, hB_host, batch_count); hipblas_error_device = norm_check_general('F', M, N, ldb, strideB, hB_cpu, hB_device, batch_count); if(arg.unit_check) { unit_check_error(hipblas_error_host, tolerance); unit_check_error(hipblas_error_device, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) { gpu_time_used = get_time_us_sync(stream); } CHECK_HIPBLAS_ERROR(hipblasTrsmStridedBatchedExFn(handle, side, uplo, transA, diag, M, N, d_alpha, dA, lda, strideA, dB, ldb, strideB, batch_count, dinvA, invA_size, stride_invA, arg.compute_type)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrsmStridedBatchedExModel{}.log_args(std::cout, arg, gpu_time_used, trsm_gflop_count(M, N, K), trsm_gbyte_count(M, N, K), hipblas_error_host, hipblas_error_device); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trsv.hpp000066400000000000000000000157311434647641600222600ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrsvModel = ArgumentModel; inline void testname_trsv(const Arguments& arg, std::string& name) { hipblasTrsvModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trsv(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrsvFn = FORTRAN ? hipblasTrsv : hipblasTrsv; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int M = arg.M; int incx = arg.incx; int lda = arg.lda; int abs_incx = incx < 0 ? -incx : incx; size_t size_A = size_t(lda) * M; size_t size_x = abs_incx * size_t(M); hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || lda < M || lda < 1 || !incx; if(invalid_size || !M) { hipblasStatus_t actual = hipblasTrsvFn(handle, uplo, transA, diag, M, nullptr, lda, nullptr, incx); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(size_A); host_vector AAT(size_A); host_vector hb(size_x); host_vector hx(size_x); host_vector hx_or_b_1(size_x); device_vector dA(size_A); device_vector dx_or_b(size_x); double gpu_time_used, hipblas_error; // Initial Data on CPU hipblas_init_matrix(hA, arg, M, M, lda, 0, 1, hipblas_client_never_set_nan, true, false); hipblas_init_vector(hx, arg, M, abs_incx, 0, 1, hipblas_client_never_set_nan, false, true); hb = hx; // calculate AAT = hA * hA ^ T cblas_gemm(HIPBLAS_OP_N, HIPBLAS_OP_T, M, M, M, (T)1.0, hA.data(), lda, hA.data(), lda, (T)0.0, AAT.data(), lda); // copy AAT into hA, make hA strictly diagonal dominant, and therefore SPD for(int i = 0; i < M; i++) { T t = 0.0; for(int j = 0; j < M; j++) { hA[i + j * lda] = AAT[i + j * lda]; t += std::abs(AAT[i + j * lda]); } hA[i + i * lda] = t; } // calculate Cholesky factorization of SPD matrix hA cblas_potrf(arg.uplo, M, hA.data(), lda); // make hA unit diagonal if diag == rocblas_diagonal_unit if(arg.diag == 'U' || arg.diag == 'u') { if('L' == arg.uplo || 'l' == arg.uplo) for(int i = 0; i < M; i++) { T diag = hA[i + i * lda]; for(int j = 0; j <= i; j++) hA[i + j * lda] = hA[i + j * lda] / diag; } else for(int j = 0; j < M; j++) { T diag = hA[j + j * lda]; for(int i = 0; i <= j; i++) hA[i + j * lda] = hA[i + j * lda] / diag; } } // Calculate hb = hA*hx; cblas_trmv(uplo, transA, diag, M, hA.data(), lda, hb.data(), incx); hx_or_b_1 = hb; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * size_A, hipMemcpyHostToDevice)); CHECK_HIP_ERROR( hipMemcpy(dx_or_b, hx_or_b_1.data(), sizeof(T) * size_x, hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrsvFn(handle, uplo, transA, diag, M, dA, lda, dx_or_b, incx)); // copy output from device to CPU CHECK_HIP_ERROR( hipMemcpy(hx_or_b_1.data(), dx_or_b, sizeof(T) * size_x, hipMemcpyDeviceToHost)); // Calculating error hipblas_error = std::abs(vector_norm_1(M, abs_incx, hx.data(), hx_or_b_1.data())); if(arg.unit_check) { double tolerance = std::numeric_limits>::epsilon() * 40 * M; unit_check_error(hipblas_error, tolerance); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR( hipblasTrsvFn(handle, uplo, transA, diag, M, dA, lda, dx_or_b, incx)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTrsvModel{}.log_args(std::cout, arg, gpu_time_used, trsv_gflop_count(M), trsv_gbyte_count(M), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trsv_batched.hpp000066400000000000000000000212651434647641600237310ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrsvBatchedModel = ArgumentModel; inline void testname_trsv_batched(const Arguments& arg, std::string& name) { hipblasTrsvBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trsv_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrsvBatchedFn = FORTRAN ? hipblasTrsvBatched : hipblasTrsvBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int M = arg.M; int incx = arg.incx; int lda = arg.lda; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; size_t size_A = size_t(lda) * M; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || lda < M || lda < 1 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTrsvBatchedFn( handle, uplo, transA, diag, M, nullptr, lda, nullptr, incx, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(size_A, 1, batch_count); host_batch_vector AAT(size_A, 1, batch_count); host_batch_vector hb(M, incx, batch_count); host_batch_vector hx(M, incx, batch_count); host_batch_vector hx_or_b_1(M, incx, batch_count); device_batch_vector dA(size_A, 1, batch_count); device_batch_vector dx_or_b(M, incx, batch_count); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dx_or_b.memcheck()); double gpu_time_used, hipblas_error, cumulative_hipblas_error; // Initial Data on CPU hipblas_init_vector(hA, arg, hipblas_client_never_set_nan, true); hipblas_init_vector(hx, arg, hipblas_client_never_set_nan, false, true); hb.copy_from(hx); for(int b = 0; b < batch_count; b++) { // calculate AAT = hA * hA ^ T cblas_gemm(HIPBLAS_OP_N, HIPBLAS_OP_T, M, M, M, (T)1.0, (T*)hA[b], lda, (T*)hA[b], lda, (T)0.0, (T*)AAT[b], lda); // copy AAT into hA, make hA strictly diagonal dominant, and therefore SPD for(int i = 0; i < M; i++) { T t = 0.0; for(int j = 0; j < M; j++) { hA[b][i + j * lda] = AAT[b][i + j * lda]; t += std::abs(AAT[b][i + j * lda]); } hA[b][i + i * lda] = t; } // calculate Cholesky factorization of SPD matrix hA cblas_potrf(arg.uplo, M, hA[b], lda); // make hA unit diagonal if diag == rocblas_diagonal_unit if(arg.diag == 'U' || arg.diag == 'u') { if('L' == arg.uplo || 'l' == arg.uplo) for(int i = 0; i < M; i++) { T diag = hA[b][i + i * lda]; for(int j = 0; j <= i; j++) hA[b][i + j * lda] = hA[b][i + j * lda] / diag; } else for(int j = 0; j < M; j++) { T diag = hA[b][j + j * lda]; for(int i = 0; i <= j; i++) hA[b][i + j * lda] = hA[b][i + j * lda] / diag; } } } for(int b = 0; b < batch_count; b++) { // Calculate hb = hA*hx; cblas_trmv(uplo, transA, diag, M, hA[b], lda, hb[b], incx); } hx_or_b_1.copy_from(hb); CHECK_HIP_ERROR(dx_or_b.transfer_from(hx_or_b_1)); CHECK_HIP_ERROR(dA.transfer_from(hA)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrsvBatchedFn(handle, uplo, transA, diag, M, dA.ptr_on_device(), lda, dx_or_b.ptr_on_device(), incx, batch_count)); CHECK_HIP_ERROR(hx_or_b_1.transfer_from(dx_or_b)); // Calculating error // For norm_check/bench, currently taking the cumulative sum of errors over all batches for(int b = 0; b < batch_count; b++) { hipblas_error = std::abs(vector_norm_1(M, abs_incx, hx[b], hx_or_b_1[b])); if(arg.unit_check) { double tolerance = std::numeric_limits>::epsilon() * 40 * M; unit_check_error(hipblas_error, tolerance); } cumulative_hipblas_error += hipblas_error; } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrsvBatchedFn(handle, uplo, transA, diag, M, dA.ptr_on_device(), lda, dx_or_b.ptr_on_device(), incx, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTrsvBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trsv_gflop_count(M), trsv_gbyte_count(M), cumulative_hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trsv_strided_batched.hpp000066400000000000000000000225761434647641600254550ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrsvStridedBatchedModel = ArgumentModel; inline void testname_trsv_strided_batched(const Arguments& arg, std::string& name) { hipblasTrsvStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trsv_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrsvStridedBatchedFn = FORTRAN ? hipblasTrsvStridedBatched : hipblasTrsvStridedBatched; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); hipblasOperation_t transA = char2hipblas_operation(arg.transA); int M = arg.M; int incx = arg.incx; int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int abs_incx = incx < 0 ? -incx : incx; hipblasStride strideA = lda * M * stride_scale; hipblasStride stridex = abs_incx * M * stride_scale; size_t size_A = size_t(strideA) * batch_count; size_t size_x = size_t(stridex) * batch_count; hipblasLocalHandle handle(arg); // argument sanity check, quick return if input parameters are invalid before allocating invalid // memory bool invalid_size = M < 0 || lda < M || lda < 1 || !incx || batch_count < 0; if(invalid_size || !M || !batch_count) { hipblasStatus_t actual = hipblasTrsvStridedBatchedFn(handle, uplo, transA, diag, M, nullptr, lda, strideA, nullptr, incx, stridex, batch_count); EXPECT_HIPBLAS_STATUS( actual, (invalid_size ? HIPBLAS_STATUS_INVALID_VALUE : HIPBLAS_STATUS_SUCCESS)); return actual; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(size_A); host_vector AAT(size_A); host_vector hb(size_x); host_vector hx(size_x); host_vector hx_or_b_1(size_x); device_vector dA(size_A); device_vector dx_or_b(size_x); double gpu_time_used, hipblas_error, cumulative_hipblas_error; // Initial Data on CPU hipblas_init_matrix( hA, arg, M, M, lda, strideA, batch_count, hipblas_client_never_set_nan, true); hipblas_init_vector( hx, arg, M, abs_incx, stridex, batch_count, hipblas_client_never_set_nan, false, true); hb = hx; for(int b = 0; b < batch_count; b++) { T* hAb = hA.data() + b * strideA; T* AATb = AAT.data() + b * strideA; T* hbb = hb.data() + b * stridex; // calculate AAT = hA * hA ^ T cblas_gemm( HIPBLAS_OP_N, HIPBLAS_OP_T, M, M, M, (T)1.0, hAb, lda, hAb, lda, (T)0.0, AATb, lda); // copy AAT into hA, make hA strictly diagonal dominant, and therefore SPD for(int i = 0; i < M; i++) { T t = 0.0; for(int j = 0; j < M; j++) { hAb[i + j * lda] = AATb[i + j * lda]; t += std::abs(AATb[i + j * lda]); } hAb[i + i * lda] = t; } // calculate Cholesky factorization of SPD matrix hA cblas_potrf(arg.uplo, M, hAb, lda); // make hA unit diagonal if diag == rocblas_diagonal_unit if(arg.diag == 'U' || arg.diag == 'u') { if('L' == arg.uplo || 'l' == arg.uplo) for(int i = 0; i < M; i++) { T diag = hAb[i + i * lda]; for(int j = 0; j <= i; j++) hAb[i + j * lda] = hAb[i + j * lda] / diag; } else for(int j = 0; j < M; j++) { T diag = hAb[j + j * lda]; for(int i = 0; i <= j; i++) hAb[i + j * lda] = hA[b + j * lda] / diag; } } // Calculate hb = hA*hx; cblas_trmv(uplo, transA, diag, M, hAb, lda, hbb, incx); } hx_or_b_1 = hb; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * size_A, hipMemcpyHostToDevice)); CHECK_HIP_ERROR( hipMemcpy(dx_or_b, hx_or_b_1.data(), sizeof(T) * size_x, hipMemcpyHostToDevice)); /* ===================================================================== HIPBLAS =================================================================== */ if(arg.unit_check || arg.norm_check) { CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); CHECK_HIPBLAS_ERROR(hipblasTrsvStridedBatchedFn( handle, uplo, transA, diag, M, dA, lda, strideA, dx_or_b, incx, stridex, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR( hipMemcpy(hx_or_b_1.data(), dx_or_b, sizeof(T) * size_x, hipMemcpyDeviceToHost)); // Calculating error // For norm_check/bench, currently taking the cumulative sum of errors over all batches for(int b = 0; b < batch_count; b++) { hipblas_error = std::abs(vector_norm_1( M, abs_incx, hx.data() + b * stridex, hx_or_b_1.data() + b * stridex)); if(arg.unit_check) { double tolerance = std::numeric_limits>::epsilon() * 40 * M; unit_check_error(hipblas_error, tolerance); } cumulative_hipblas_error += hipblas_error; } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); CHECK_HIPBLAS_ERROR(hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_HOST)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrsvStridedBatchedFn(handle, uplo, transA, diag, M, dA, lda, strideA, dx_or_b, incx, stridex, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; // in microseconds hipblasTrsvStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trsv_gflop_count(M), trsv_gbyte_count(M), cumulative_hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trtri.hpp000066400000000000000000000125331434647641600224230ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrtriModel = ArgumentModel; inline void testname_trtri(const Arguments& arg, std::string& name) { hipblasTrtriModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trtri(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrtriFn = FORTRAN ? hipblasTrtri : hipblasTrtri; const double rel_error = get_epsilon() * 1000; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int N = arg.N; int lda = arg.lda; int ldinvA = lda; size_t A_size = size_t(lda) * N; // check here to prevent undefined memory allocation error if(N < 0 || lda < 0 || lda < N) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(A_size); device_vector dA(A_size); device_vector dinvA(A_size); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); // Initial Data on CPU srand(1); hipblas_init_symmetric(hA, N, lda); // proprocess the matrix to avoid ill-conditioned matrix for(int i = 0; i < N; i++) { for(int j = 0; j < N; j++) { hA[i + j * lda] *= 0.01; if(j % 2) hA[i + j * lda] *= -1; if(uplo == HIPBLAS_FILL_MODE_LOWER && j > i) hA[i + j * lda] = 0.0f; else if(uplo == HIPBLAS_FILL_MODE_UPPER && j < i) hA[i + j * lda] = 0.0f; if(i == j) { if(diag == HIPBLAS_DIAG_UNIT) hA[i + j * lda] = 1.0; else hA[i + j * lda] *= 100.0; } } } hB = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dinvA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTrtriFn(handle, uplo, diag, N, dA, lda, dinvA, ldinvA)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hA, dinvA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ cblas_trtri(arg.uplo, arg.diag, N, hB, lda); if(arg.unit_check) { near_check_general(N, N, lda, hB.data(), hA.data(), rel_error); } if(arg.norm_check) { hipblas_error = norm_check_general('F', N, N, lda, hB, hA); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrtriFn(handle, uplo, diag, N, dA, lda, dinvA, ldinvA)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrtriModel{}.log_args(std::cout, arg, gpu_time_used, trtri_gflop_count(N), trtri_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trtri_batched.hpp000066400000000000000000000153131434647641600240740ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrtriBatchedModel = ArgumentModel; inline void testname_trtri_batched(const Arguments& arg, std::string& name) { hipblasTrtriBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trtri_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrtriBatchedFn = FORTRAN ? hipblasTrtriBatched : hipblasTrtriBatched; const double rel_error = get_epsilon() * 1000; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int N = arg.N; int lda = arg.lda; int batch_count = arg.batch_count; int ldinvA = lda; size_t A_size = size_t(lda) * N; // check here to prevent undefined memory allocation error if(N < 0 || lda < 0 || lda < N || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_batch_vector hA(A_size, 1, batch_count); host_batch_vector hB(A_size, 1, batch_count); device_batch_vector dA(A_size, 1, batch_count); device_batch_vector dinvA(A_size, 1, batch_count); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dinvA.memcheck()); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); hipblas_init(hA, true); for(int b = 0; b < batch_count; b++) { // proprocess the matrix to avoid ill-conditioned matrix for(int i = 0; i < N; i++) { for(int j = 0; j < N; j++) { hA[b][i + j * lda] *= 0.01; if(j % 2) hA[b][i + j * lda] *= -1; if(uplo == HIPBLAS_FILL_MODE_LOWER && j > i) hA[b][i + j * lda] = 0.0f; else if(uplo == HIPBLAS_FILL_MODE_UPPER && j < i) hA[b][i + j * lda] = 0.0f; if(i == j) { if(diag == HIPBLAS_DIAG_UNIT) hA[b][i + j * lda] = 1.0; else hA[b][i + j * lda] *= 100.0; } } } } hB.copy_from(hA); CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dinvA.transfer_from(hA)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTrtriBatchedFn(handle, uplo, diag, N, dA.ptr_on_device(), lda, dinvA.ptr_on_device(), ldinvA, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hA.transfer_from(dinvA)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trtri(arg.uplo, arg.diag, N, hB[b], lda); } if(arg.unit_check) { for(int b = 0; b < batch_count; b++) near_check_general(N, N, lda, hB[b], hA[b], rel_error); } if(arg.norm_check) { hipblas_error = norm_check_general('F', N, N, lda, hB, hA, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrtriBatchedFn(handle, uplo, diag, N, dA.ptr_on_device(), lda, dinvA.ptr_on_device(), ldinvA, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrtriBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trtri_gflop_count(N), trtri_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/testing_trtri_strided_batched.hpp000066400000000000000000000145561434647641600256220ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include #include #include #include #include "testing_common.hpp" /* ============================================================================================ */ using hipblasTrtriStridedBatchedModel = ArgumentModel; inline void testname_trtri_strided_batched(const Arguments& arg, std::string& name) { hipblasTrtriStridedBatchedModel{}.test_name(arg, name); } template inline hipblasStatus_t testing_trtri_strided_batched(const Arguments& arg) { bool FORTRAN = arg.fortran; auto hipblasTrtriStridedBatchedFn = FORTRAN ? hipblasTrtriStridedBatched : hipblasTrtriStridedBatched; const double rel_error = get_epsilon() * 1000; hipblasFillMode_t uplo = char2hipblas_fill(arg.uplo); hipblasDiagType_t diag = char2hipblas_diagonal(arg.diag); int N = arg.N; int lda = arg.lda; double stride_scale = arg.stride_scale; int batch_count = arg.batch_count; int ldinvA = lda; hipblasStride strideA = size_t(lda) * N * stride_scale; size_t A_size = strideA * batch_count; // check here to prevent undefined memory allocation error if(N < 0 || lda < 0 || lda < N || batch_count < 0) { return HIPBLAS_STATUS_INVALID_VALUE; } // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory host_vector hA(A_size); host_vector hB(A_size); device_vector dA(A_size); device_vector dinvA(A_size); double gpu_time_used, hipblas_error; hipblasLocalHandle handle(arg); srand(1); hipblas_init_symmetric(hA, N, lda, strideA, batch_count); for(int b = 0; b < batch_count; b++) { T* hAb = hA.data() + b * strideA; // proprocess the matrix to avoid ill-conditioned matrix for(int i = 0; i < N; i++) { for(int j = 0; j < N; j++) { hAb[i + j * lda] *= 0.01; if(j % 2) hAb[i + j * lda] *= -1; if(uplo == HIPBLAS_FILL_MODE_LOWER && j > i) hAb[i + j * lda] = 0.0f; else if(uplo == HIPBLAS_FILL_MODE_UPPER && j < i) hAb[i + j * lda] = 0.0f; if(i == j) { if(diag == HIPBLAS_DIAG_UNIT) hAb[i + j * lda] = 1.0; else hAb[i + j * lda] *= 100.0; } } } } hB = hA; // copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy(dA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); CHECK_HIP_ERROR(hipMemcpy(dinvA, hA, sizeof(T) * A_size, hipMemcpyHostToDevice)); if(arg.unit_check || arg.norm_check) { /* ===================================================================== HIPBLAS =================================================================== */ CHECK_HIPBLAS_ERROR(hipblasTrtriStridedBatchedFn( handle, uplo, diag, N, dA, lda, strideA, dinvA, ldinvA, strideA, batch_count)); // copy output from device to CPU CHECK_HIP_ERROR(hipMemcpy(hA, dinvA, sizeof(T) * A_size, hipMemcpyDeviceToHost)); /* ===================================================================== CPU BLAS =================================================================== */ for(int b = 0; b < batch_count; b++) { cblas_trtri(arg.uplo, arg.diag, N, hB.data() + b * strideA, lda); } // enable unit check, notice unit check is not invasive, but norm check is, // unit check and norm check can not be interchanged their order if(arg.unit_check) { near_check_general(N, N, batch_count, lda, strideA, hB, hA, rel_error); } if(arg.norm_check) { hipblas_error = norm_check_general('F', N, N, lda, strideA, hB, hA, batch_count); } } if(arg.timing) { hipStream_t stream; CHECK_HIPBLAS_ERROR(hipblasGetStream(handle, &stream)); int runs = arg.cold_iters + arg.iters; for(int iter = 0; iter < runs; iter++) { if(iter == arg.cold_iters) gpu_time_used = get_time_us_sync(stream); CHECK_HIPBLAS_ERROR(hipblasTrtriStridedBatchedFn( handle, uplo, diag, N, dA, lda, strideA, dinvA, ldinvA, strideA, batch_count)); } gpu_time_used = get_time_us_sync(stream) - gpu_time_used; hipblasTrtriStridedBatchedModel{}.log_args(std::cout, arg, gpu_time_used, trtri_gflop_count(N), trtri_gbyte_count(N), hipblas_error); } return HIPBLAS_STATUS_SUCCESS; } hipBLAS-rocm-5.5.1/clients/include/tuple_helper.hpp000066400000000000000000000150351434647641600222120ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef _TUPLE_HELPER_HPP_ #define _TUPLE_HELPER_HPP_ #include #include #include #include #include #include #include /***************************************************** * Tuple helper class provides operations on tuples * *****************************************************/ class tuple_helper { /******************************************************************** * Traverse (key, value) pairs, applying functions or printing YAML * ********************************************************************/ template static void apply_pairs_impl(FUNC&& func, const TUP& tuple, std::index_sequence) { // TODO: Replace with C++17 fold expression // (func(std::get(tuple), std::get(tuple)), ...); (void)(int[]){(func(std::get(tuple), std::get(tuple)), 0)...}; } public: // Apply a function to pairs in a tuple (name1, value1, name2, value2, ...) template static void apply_pairs(FUNC&& func, const TUP& tuple) { static_assert(std::tuple_size{} % 2 == 0, "Tuple size must be even"); apply_pairs_impl(std::forward(func), tuple, std::make_index_sequence::value / 2>{}); } // Print a tuple which is expected to be (name1, value1, name2, value2, ...) template static std::ostream& print_tuple_pairs(std::ostream& os, const TUP& tuple) { static_assert(std::tuple_size{} % 2 == 0, "Tuple size must be even"); // delim starts as "{ " and becomes ", " afterwards auto print_pair = [&, delim = "{ "](const char* name, const auto& value) mutable { os << delim << std::make_pair(name, value); delim = ", "; }; // Call print_argument for each (name, value) tuple pair apply_pairs(print_pair, tuple); // Closing brace return os << " }\n"; } /********************************************************************* * Compute value hashes for (key1, value1, key2, value2, ...) tuples * *********************************************************************/ // Default hash for non-enum types template {}, int> = 0> static size_t hash(const T& x) { return std::hash{}(x); } // Workaround for compilers which don't implement C++14 enum hash template {}, int> = 0> static size_t hash(const T& x) { return std::hash>{}(std::underlying_type_t(x)); } // C-style string hash since std::hash does not hash them static size_t hash(const char* s) { size_t seed = 0xcbf29ce484222325; for(auto p = reinterpret_cast(s); *p; ++p) seed = (seed ^ *p) * 0x100000001b3; // FNV-1a return seed; } // For std::string consistency with above static size_t hash(const std::string& s) { return hash(s.c_str()); } // Iterate over pairs, combining hash values template static size_t hash(const TUP& tuple, std::index_sequence) { size_t seed = 0; for(size_t h : {hash(std::get(tuple))...}) seed ^= h + 0x9e3779b9 + (seed << 6) + (seed >> 2); return seed; } // Hash function class compatible with STL containers template struct hash_t { static_assert(std::tuple_size{} % 2 == 0, "Tuple size must be even"); size_t operator()(const TUP& tuple) const { return hash(tuple, std::make_index_sequence{} / 2>{}); } }; /************************************************************************ * Test (key1, value1, key2, value2, ...) tuples for equality of values * ************************************************************************/ private: // Default comparison template static bool equal(const T& x1, const T& x2) { return x1 == x2; } // C-string == C-string static bool equal(const char* s1, const char* s2) { return !strcmp(s1, s2); } // Compute equality of values in tuple (name, value) pairs template static bool equal(const TUP& t1, const TUP& t2, std::index_sequence) { // TODO: Replace with C++17 fold expression // return (equal(std::get(t1), std::get(t2)) && ...); bool ret = true; (void)(bool[]){(ret = ret && equal(std::get(t1), std::get(t2)))...}; return ret; } public: // Tuple (name, value) equality test class is compatible with STL associative containers template struct equal_t { static_assert(std::tuple_size{} % 2 == 0, "Tuple size must be even"); bool operator()(const TUP& t1, const TUP& t2) const { return equal(t1, t2, std::make_index_sequence{} / 2>{}); } }; }; #endif hipBLAS-rocm-5.5.1/clients/include/type_dispatch.hpp000066400000000000000000000236761434647641600223740ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifndef _HIPBLAS_TYPE_DISPATCH_ #define _HIPBLAS_TYPE_DISPATCH_ #include "hipblas.hpp" #include "utility.h" // ---------------------------------------------------------------------------- // Calls TEST template based on the argument types. TEST<> is expected to // return a functor which takes a const Arguments& argument. If the types do // not match a recognized type combination, then TEST is called. This // function returns the same type as TEST<...>{}(arg), usually bool or void. // ---------------------------------------------------------------------------- // Simple functions which take only one datatype // // Even if the function can take mixed datatypes, this function can handle the // cases where the types are uniform, in which case one template type argument // is passed to TEST, and the rest are assumed to match the first. template