pax_global_header00006660000000000000000000000064143610720730014515gustar00rootroot0000000000000052 comment=2d63d95d119ce93737488c466f8958f80cf0bece hipSOLVER-rocm-5.5.1/000077500000000000000000000000001436107207300141765ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/.clang-format000066400000000000000000000065421436107207300165600ustar00rootroot00000000000000# Style file for MLSE Libraries based on the modified rocBLAS style # Common settings BasedOnStyle: WebKit TabWidth: 4 IndentWidth: 4 UseTab: Never ColumnLimit: 100 # Other languages JavaScript, Proto --- Language: Cpp # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code # int formatted_code; # // clang-format off # void unformatted_code ; # // clang-format on # void formatted_code_again; DisableFormat: false Standard: Cpp11 AccessModifierOffset: -4 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: false AllowAllArgumentsOnNextLine: true AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: false BinPackParameters: false # Configure each individual brace in BraceWrapping BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: { AfterCaseLabel: 'true' AfterClass: 'true' AfterControlStatement: 'true' AfterEnum : 'true' AfterFunction : 'true' AfterNamespace : 'true' AfterStruct : 'true' AfterUnion : 'true' BeforeCatch : 'true' BeforeElse : 'true' IndentBraces : 'false' # AfterExternBlock : 'true' } #BreakAfterJavaFieldAnnotations: true #BreakBeforeInheritanceComma: false #BreakBeforeBinaryOperators: None #BreakBeforeTernaryOperators: true #BreakConstructorInitializersBeforeComma: true #BreakStringLiterals: true CommentPragmas: '^ IWYU pragma:' #CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true SpaceBeforeCpp11BracedList: false DerivePointerAlignment: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IndentCaseLabels: false IndentPPDirectives: None #FixNamespaceComments: true IndentWrappedFunctionNames: true KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' #JavaScriptQuotes: Double MaxEmptyLinesToKeep: 1 NamespaceIndentation: All ObjCBlockIndentWidth: 4 #ObjCSpaceAfterProperty: true #ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: Never SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false #SpaceAfterTemplateKeyword: true #SpaceBeforeInheritanceColon: true #SortUsingDeclarations: true SortIncludes: true # Comments are for developers, they should arrange them ReflowComments: false #IncludeBlocks: Preserve --- hipSOLVER-rocm-5.5.1/.gitattributes000066400000000000000000000004471436107207300170760ustar00rootroot00000000000000# By default, convert all text files to Unix line endings on check-in # and native line endings on check-out * text=auto # Override the default behavior for specific files *.sh text eol=lf *.bat text eol=crlf # Reduce merge conflicts in changelog /CHANGELOG.md merge=union hipSOLVER-rocm-5.5.1/.githooks/000077500000000000000000000000001436107207300161035ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/.githooks/install000077500000000000000000000002221436107207300174730ustar00rootroot00000000000000#!/usr/bin/env bash cd $(git rev-parse --git-dir) cd hooks echo "Installing hooks..." ln -s ../../.githooks/pre-commit pre-commit echo "Done!" hipSOLVER-rocm-5.5.1/.githooks/pre-commit000077500000000000000000000044301436107207300201060ustar00rootroot00000000000000#!/bin/bash # # This pre-commit hook checks if any versions of clang-format # are installed, and if so, uses the installed version to format # the staged changes. export PATH=/opt/rocm/llvm/bin:/usr/bin:/bin # Redirect stdout to stderr. exec >&2 # Do everything from top - level cd $(git rev-parse --show-toplevel) if git rev-parse --verify HEAD >/dev/null 2>&1; then against=HEAD else # Initial commit: diff against an empty tree object against=4cff67984886e65e3f7d0fca2c1b6019c04ef493 fi if [[ "$1" == "--reformat" ]]; then files=$(git ls-files --exclude-standard) else files=$(git diff-index --cached --name-only $against) fi [[ -z "$files" ]] && exit # Change the copyright date at the top of any text files for file in $files; do echo "Processing copyright dates in $file" if [[ -e $file ]]; then /usr/bin/perl -pi -e 'INIT { exit 1 if !-f $ARGV[0] || -B $ARGV[0]; $year = (localtime)[5] + 1900 } s/^([*\/#[:space:]]*)Copyright\s+(?:\(C\)\s*)?(\d+)(?:\s*-\s*\d+)?/qq($1Copyright \(C\) $2@{[$year != $2 ? "-$year" : ""]})/ie if $. < 10' "$file" && git add -u "$file" fi done # do the formatting for file in $files; do if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.in$|\.txt$|\.yaml$|\.yml$|\.sh$|\.py$|\.pl$|\.cmake$|\.md$|\.rst$|\.groovy$|\.ini$|\.awk$|\.csv$'; then echo "Processing line endings in $file" sed -i -e 's/[[:space:]]*$//' "$file" # Remove whitespace at end of lines sed -i -e '$a\' "$file" # Add missing newline to end of file echo "Converting non-ASCII characters to ASCII equivalents in $file" # Convert UTF8 non-ASCII to ASCII temp=$(mktemp) [[ -w $temp ]] || exit iconv -s -f utf-8 -t ascii//TRANSLIT "$file" > "$temp" || exit chmod --reference="$file" "$temp" || exit mv -f "$temp" "$file" || exit git add -u "$file" fi done # if clang-format exists, run it on C/C++ files if command -v clang-format >/dev/null; then for file in $files; do if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.h\.in$|\.hpp\.in$|\.cpp\.in$'; then echo "clang-format $file" clang-format -i -style=file "$file" git add -u "$file" fi done fi hipSOLVER-rocm-5.5.1/.github/000077500000000000000000000000001436107207300155365ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/.github/CODEOWNERS000066400000000000000000000000461436107207300171310ustar00rootroot00000000000000* @jzuniga-amd @tfalders @cgmb @qjojo hipSOLVER-rocm-5.5.1/.github/CONTRIBUTING.md000066400000000000000000000015271436107207300177740ustar00rootroot00000000000000## Contribution License Agreement 1. The code I am contributing is mine, and I have the right to license it. 2. By submitting a pull request for this project I am granting you a license to distribute said code under the MIT License for the project. ## How to contribute Our code contriubtion guidelines closely follows the model of [GitHub pull-requests](https://help.github.com/articles/using-pull-requests/). This repository follows the [git flow](http://nvie.com/posts/a-successful-git-branching-model/) workflow, which dictates a /master branch where releases are cut, and a /develop branch which serves as an integration branch for new code. ## Pull-request guidelines * target the **develop** branch for integration * ensure code builds successfully. * do not break existing test cases * new functionality will only be merged with new unit tests hipSOLVER-rocm-5.5.1/.github/workflows/000077500000000000000000000000001436107207300175735ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/.github/workflows/docs.yaml000066400000000000000000000044631436107207300214160ustar00rootroot00000000000000name: Upload to the upload server # Controls when the workflow will run on: push: branches: [develop, master] tags: - rocm-5.* release: types: [published] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: # This workflow contains a single job called "build" build: # The type of runner that the job will run on runs-on: ubuntu-latest # Steps represent a sequence of tasks that will be executed as part of the job steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - name: getting branch name shell: bash run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" id: branch_name - name: getting tag name shell: bash run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})" id: tag_name - name: zipping files run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*' - name: echo-step run: echo "${{ github.event.release.target_commitish }}" - name: uploading archive to prod if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.PROD_UPLOAD_URL }}' args: '-o ConnectTimeout=5' - name: uploading archive to staging if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.STG_UPLOAD_URL }}' args: '-o ConnectTimeout=5' hipSOLVER-rocm-5.5.1/.gitignore000066400000000000000000000005461436107207300161730ustar00rootroot00000000000000# Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod *.smod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app # Editors .vscode # build-in-source directory build/ docBin/ # emacs temporary/backup files .\#* \#*\# *~ hipSOLVER-rocm-5.5.1/.jenkins/000077500000000000000000000000001436107207300157155ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/.jenkins/common.groovy000066400000000000000000000050301436107207300204520ustar00rootroot00000000000000// This file is for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. def runCompileCommand(platform, project, jobName, boolean sameOrg=false) { project.paths.construct_build_prefix() def getDependenciesCommand = "" if (project.installLibraryDependenciesFromCI) { project.libraryDependencies.each { libraryName -> getDependenciesCommand += auxiliary.getLibrary(libraryName, platform.jenkinsLabel, null, sameOrg) } } String debug = project.buildName.contains('Debug') ? '-g' : '' String centos = platform.jenkinsLabel.contains('centos') ? 'source scl_source enable devtoolset-7' : ':' def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix} ${getDependenciesCommand} ${centos} LD_LIBRARY_PATH=/opt/rocm/lib ${project.paths.build_command} ${debug} """ platform.runCommand(this, command) } def runTestCommand(platform, project) { String buildType = project.buildName.contains('Debug') ? "debug" : "release" String testExe = project.buildName.contains('Debug') ? "hipsolver-test-d" : "hipsolver-test" def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix}/build/${buildType}/clients/staging LD_LIBRARY_PATH=/opt/rocm/lib GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./${testExe} --gtest_output=xml --gtest_color=yes """ platform.runCommand(this, command) junit "${project.paths.project_build_prefix}/build/${buildType}/clients/staging/*.xml" } def runPackageCommand(platform, project, jobName, label='') { def command label = label != '' ? '-' + label.toLowerCase() : '' String ext = platform.jenkinsLabel.contains('ubuntu') ? "deb" : "rpm" String dir = project.buildName.contains('Debug') ? "debug" : "release" command = """ set -x cd ${project.paths.project_build_prefix}/build/${dir} make package mkdir -p package if [ ! -z "$label" ] then for f in hipsolver*.$ext do mv "\$f" "hipsolver${label}-\${f#*-}" done fi mv *.${ext} package/ """ platform.runCommand(this, command) platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/build/${dir}/package/*.${ext}""") } return this hipSOLVER-rocm-5.5.1/.jenkins/debug.groovy000066400000000000000000000054631436107207300202620ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipSOLVER', 'Debug') // customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['rocBLAS-internal', 'rocSOLVER'] prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c --compiler=g++' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') } hipSOLVER-rocm-5.5.1/.jenkins/multicompiler.groovy000066400000000000000000000062631436107207300220600ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipSOLVER', 'MultiCompiler') //customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['rocBLAS-internal', 'rocSOLVER'] prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c --compiler=g++' String hipClangBuildCommand = './install.sh -c --compiler=/opt/rocm/hip/bin/hipcc' String clangBuildCommand = './install.sh -c --compiler=clang++' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') setupCI(urlJobName, jobNameList, hipClangBuildCommand, runCI, 'hip-clang') } hipSOLVER-rocm-5.5.1/.jenkins/precheckin-cuda.groovy000066400000000000000000000046551436107207300222230ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipSOLVER', 'PreCheckin-CUDA') //customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = [] prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, null) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI(['ubuntu20-cuda11':['anycuda']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = [] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = [:] propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c --compiler=g++ --cuda' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') } hipSOLVER-rocm-5.5.1/.jenkins/precheckin.groovy000066400000000000000000000057051436107207300213060ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipSOLVER', 'PreCheckin') //customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['rocBLAS-internal', 'rocSOLVER'] prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c --compiler=g++' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') } hipSOLVER-rocm-5.5.1/.jenkins/staticanalysis.groovy000066400000000000000000000034521436107207300222230ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCompileCommand(platform, project, jobName, boolean debug=false) { project.paths.construct_build_prefix() def command = """#!/usr/bin/env bash set -x ${project.paths.project_build_prefix}/docs/run_doc.sh """ try { platform.runCommand(this, command) } catch(e) { throw e } publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: "${project.paths.project_build_prefix}/docs/build/html", reportFiles: "index.html", reportName: "Documentation", reportTitles: "Documentation"]) } def runCI = { nodeDetails, jobName-> def prj = new rocProject('hipSOLVER', 'Static Analysis') // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = true boolean staticAnalysis = true def compileCommand = { platform, project-> runCompileCommand(platform, project, jobName, false) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, null, null, staticAnalysis) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 2')])])) stage(urlJobName) { runCI([ubuntu20:['cpu']], urlJobName) } } hipSOLVER-rocm-5.5.1/.jenkins/staticlibrary.groovy000066400000000000000000000046651436107207300220530ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('hipSOLVER', 'StaticLibrary') prj.paths.build_command = './install.sh -cd --static -p /opt/rocm/lib/cmake' prj.libraryDependencies = ['rocBLAS-internal', 'rocSOLVER'] prj.defaults.ccache = true // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = true def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName, true) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu18:['gfx900']], urlJobName) } } } hipSOLVER-rocm-5.5.1/.readthedocs.yaml000066400000000000000000000004171436107207300174270ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/source/conf.py formats: all python: version: "3.7" install: - requirements: docs/source/requirements.txt hipSOLVER-rocm-5.5.1/CHANGELOG.md000066400000000000000000000245631436107207300160210ustar00rootroot00000000000000# Change Log for hipSOLVER Full documentation for hipSOLVER is available at [hipsolver.readthedocs.io](https://hipsolver.readthedocs.io/en/latest/). ## hipSOLVER 1.7.0 for ROCm 5.5.0 ### Added - Added functions - gesvdj - hipsolverSgesvdj_bufferSize, hipsolverDgesvdj_bufferSize, hipsolverCgesvdj_bufferSize, hipsolverZgesvdj_bufferSize - hipsolverSgesvdj, hipsolverDgesvdj, hipsolverCgesvdj, hipsolverZgesvdj - gesvdjBatched - hipsolverSgesvdjBatched_bufferSize, hipsolverDgesvdjBatched_bufferSize, hipsolverCgesvdjBatched_bufferSize, hipsolverZgesvdjBatched_bufferSize - hipsolverSgesvdjBatched, hipsolverDgesvdjBatched, hipsolverCgesvdjBatched, hipsolverZgesvdjBatched ## hipSOLVER 1.6.0 for ROCm 5.4.0 ### Added - Added compatibility-only functions - gesvdaStridedBatched - hipsolverDnSgesvdaStridedBatched_bufferSize, hipsolverDnDgesvdaStridedBatched_bufferSize, hipsolverDnCgesvdaStridedBatched_bufferSize, hipsolverDnZgesvdaStridedBatched_bufferSize - hipsolverDnSgesvdaStridedBatched, hipsolverDnDgesvdaStridedBatched, hipsolverDnCgesvdaStridedBatched, hipsolverDnZgesvdaStridedBatched ## hipSOLVER 1.5.0 for ROCm 5.3.0 ### Added - Added functions - syevj - hipsolverSsyevj_bufferSize, hipsolverDsyevj_bufferSize, hipsolverCheevj_bufferSize, hipsolverZheevj_bufferSize - hipsolverSsyevj, hipsolverDsyevj, hipsolverCheevj, hipsolverZheevj - syevjBatched - hipsolverSsyevjBatched_bufferSize, hipsolverDsyevjBatched_bufferSize, hipsolverCheevjBatched_bufferSize, hipsolverZheevjBatched_bufferSize - hipsolverSsyevjBatched, hipsolverDsyevjBatched, hipsolverCheevjBatched, hipsolverZheevjBatched - sygvj - hipsolverSsygvj_bufferSize, hipsolverDsygvj_bufferSize, hipsolverChegvj_bufferSize, hipsolverZhegvj_bufferSize - hipsolverSsygvj, hipsolverDsygvj, hipsolverChegvj, hipsolverZhegvj - Added compatibility-only functions - syevdx/heevdx - hipsolverDnSsyevdx_bufferSize, hipsolverDnDsyevdx_bufferSize, hipsolverDnCheevdx_bufferSize, hipsolverDnZheevdx_bufferSize - hipsolverDnSsyevdx, hipsolverDnDsyevdx, hipsolverDnCheevdx, hipsolverDnZheevdx - sygvdx/hegvdx - hipsolverDnSsygvdx_bufferSize, hipsolverDnDsygvdx_bufferSize, hipsolverDnChegvdx_bufferSize, hipsolverDnZhegvdx_bufferSize - hipsolverDnSsygvdx, hipsolverDnDsygvdx, hipsolverDnChegvdx, hipsolverDnZhegvdx - Added --mem_query option to hipsolver-bench, which will print the amount of device memory workspace required by the function. ### Changed - The rocSOLVER backend will now set `info` to zero if rocSOLVER does not reference `info`. (Applies to orgbr/ungbr, orgqr/ungqr, orgtr/ungtr, ormqr/unmqr, ormtr/unmtr, gebrd, geqrf, getrs, potrs, and sytrd/hetrd). - gesvdj will no longer require extra workspace to transpose `V` when `jobz` is `HIPSOLVER_EIG_MODE_VECTOR` and `econ` is 1. ### Fixed - Fixed Fortran return value declarations within hipsolver_module.f90 - Fixed gesvdj_bufferSize returning `HIPSOLVER_STATUS_INVALID_VALUE` when `jobz` is `HIPSOLVER_EIG_MODE_NOVECTOR` and 1 <= `ldv` < `n` - Fixed gesvdj returning `HIPSOLVER_STATUS_INVALID_VALUE` when `jobz` is `HIPSOLVER_EIG_MODE_VECTOR`, `econ` is 1, and `m` < `n` ## hipSOLVER 1.4.0 for ROCm 5.2.0 ### Added - Package generation for test and benchmark executables on all supported OSes using CPack. - File/Folder Reorg - Added File/Folder Reorg Changes with backward compatibility support using ROCM-CMAKE wrapper functions. ### Fixed - Fixed the ReadTheDocs documentation generation. ## hipSOLVER 1.3.0 for ROCm 5.1.0 ### Added - Added functions - gels - hipsolverSSgels_bufferSize, hipsolverDDgels_bufferSize, hipsolverCCgels_bufferSize, hipsolverZZgels_bufferSize - hipsolverSSgels, hipsolverDDgels, hipsolverCCgels, hipsolverZZgels - Added library version and device information to hipsolver-test output. - Added compatibility API with hipsolverDn prefix. - Added compatibility-only functions - gesvdj - hipsolverDnSgesvdj_bufferSize, hipsolverDnDgesvdj_bufferSize, hipsolverDnCgesvdj_bufferSize, hipsolverDnZgesvdj_bufferSize - hipsolverDnSgesvdj, hipsolverDnDgesvdj, hipsolverDnCgesvdj, hipsolverDnZgesvdj - gesvdjBatched - hipsolverDnSgesvdjBatched_bufferSize, hipsolverDnDgesvdjBatched_bufferSize, hipsolverDnCgesvdjBatched_bufferSize, hipsolverDnZgesvdjBatched_bufferSize - hipsolverDnSgesvdjBatched, hipsolverDnDgesvdjBatched, hipsolverDnCgesvdjBatched, hipsolverDnZgesvdjBatched - syevj - hipsolverDnSsyevj_bufferSize, hipsolverDnDsyevj_bufferSize, hipsolverDnCheevj_bufferSize, hipsolverDnZheevj_bufferSize - hipsolverDnSsyevj, hipsolverDnDsyevj, hipsolverDnCheevj, hipsolverDnZheevj - syevjBatched - hipsolverDnSsyevjBatched_bufferSize, hipsolverDnDsyevjBatched_bufferSize, hipsolverDnCheevjBatched_bufferSize, hipsolverDnZheevjBatched_bufferSize - hipsolverDnSsyevjBatched, hipsolverDnDsyevjBatched, hipsolverDnCheevjBatched, hipsolverDnZheevjBatched - sygvj - hipsolverDnSsygvj_bufferSize, hipsolverDnDsygvj_bufferSize, hipsolverDnChegvj_bufferSize, hipsolverDnZhegvj_bufferSize - hipsolverDnSsygvj, hipsolverDnDsygvj, hipsolverDnChegvj, hipsolverDnZhegvj ### Changed - The rocSOLVER backend now allows hipsolverXXgels and hipsolverXXgesv to be called in-place when B == X. - The rocSOLVER backend now allows rwork to be passed as a null pointer to hipsolverXgesvd. ### Fixed - bufferSize functions will now return HIPSOLVER_STATUS_NOT_INITIALIZED instead of HIPSOLVER_STATUS_INVALID_VALUE when both handle and lwork are null. - Fixed rare memory allocation failure in syevd/heevd and sygvd/hegvd caused by improper workspace array allocation outside of rocSOLVER. ## hipSOLVER 1.2.0 for ROCm 5.0.0 ### Added - Added functions - sytrf - hipsolverSsytrf_bufferSize, hipsolverDsytrf_bufferSize, hipsolverCsytrf_bufferSize, hipsolverZsytrf_bufferSize - hipsolverSsytrf, hipsolverDsytrf, hipsolverCsytrf, hipsolverZsytrf ### Fixed - Fixed use of incorrect `HIP_PATH` when building from source (#40). Thanks [@jakub329homola](https://github.com/jakub329homola)! ## hipSOLVER 1.1.0 for ROCm 4.5.0 ### Added - Added functions - gesv - hipsolverSSgesv_bufferSize, hipsolverDDgesv_bufferSize, hipsolverCCgesv_bufferSize, hipsolverZZgesv_bufferSize - hipsolverSSgesv, hipsolverDDgesv, hipsolverCCgesv, hipsolverZZgesv - potrs - hipsolverSpotrs_bufferSize, hipsolverDpotrs_bufferSize, hipsolverCpotrs_bufferSize, hipsolverZpotrs_bufferSize - hipsolverSpotrs, hipsolverDpotrs, hipsolverCpotrs, hipsolverZpotrs - potrsBatched - hipsolverSpotrsBatched_bufferSize, hipsolverDpotrsBatched_bufferSize, hipsolverCpotrsBatched_bufferSize, hipsolverZpotrsBatched_bufferSize - hipsolverSpotrsBatched, hipsolverDpotrsBatched, hipsolverCpotrsBatched, hipsolverZpotrsBatched - potri - hipsolverSpotri_bufferSize, hipsolverDpotri_bufferSize, hipsolverCpotri_bufferSize, hipsolverZpotri_bufferSize - hipsolverSpotri, hipsolverDpotri, hipsolverCpotri, hipsolverZpotri - orgbr/ungbr - hipsolverSorgbr_bufferSize, hipsolverDorgbr_bufferSize, hipsolverCungbr_bufferSize, hipsolverZungbr_bufferSize - hipsolverSorgbr, hipsolverDorgbr, hipsolverCungbr, hipsolverZungbr - orgqr/ungqr - hipsolverSorgqr_bufferSize, hipsolverDorgqr_bufferSize, hipsolverCungqr_bufferSize, hipsolverZungqr_bufferSize - hipsolverSorgqr, hipsolverDorgqr, hipsolverCungqr, hipsolverZungqr - orgtr/ungtr - hipsolverSorgtr_bufferSize, hipsolverDorgtr_bufferSize, hipsolverCungtr_bufferSize, hipsolverZungtr_bufferSize - hipsolverSorgtr, hipsolverDorgtr, hipsolverCungtr, hipsolverZungtr - ormqr/unmqr - hipsolverSormqr_bufferSize, hipsolverDormqr_bufferSize, hipsolverCunmqr_bufferSize, hipsolverZunmqr_bufferSize - hipsolverSormqr, hipsolverDormqr, hipsolverCunmqr, hipsolverZunmqr - ormtr/unmtr - hipsolverSormtr_bufferSize, hipsolverDormtr_bufferSize, hipsolverCunmtr_bufferSize, hipsolverZunmtr_bufferSize - hipsolverSormtr, hipsolverDormtr, hipsolverCunmtr, hipsolverZunmtr - gebrd - hipsolverSgebrd_bufferSize, hipsolverDgebrd_bufferSize, hipsolverCgebrd_bufferSize, hipsolverZgebrd_bufferSize - hipsolverSgebrd, hipsolverDgebrd, hipsolverCgebrd, hipsolverZgebrd - geqrf - hipsolverSgeqrf_bufferSize, hipsolverDgeqrf_bufferSize, hipsolverCgeqrf_bufferSize, hipsolverZgeqrf_bufferSize - hipsolverSgeqrf, hipsolverDgeqrf, hipsolverCgeqrf, hipsolverZgeqrf - gesvd - hipsolverSgesvd_bufferSize, hipsolverDgesvd_bufferSize, hipsolverCgesvd_bufferSize, hipsolverZgesvd_bufferSize - hipsolverSgesvd, hipsolverDgesvd, hipsolverCgesvd, hipsolverZgesvd - getrs - hipsolverSgetrs_bufferSize, hipsolverDgetrs_bufferSize, hipsolverCgetrs_bufferSize, hipsolverZgetrs_bufferSize - hipsolverSgetrs, hipsolverDgetrs, hipsolverCgetrs, hipsolverZgetrs - potrf - hipsolverSpotrf_bufferSize, hipsolverDpotrf_bufferSize, hipsolverCpotrf_bufferSize, hipsolverZpotrf_bufferSize - hipsolverSpotrf, hipsolverDpotrf, hipsolverCpotrf, hipsolverZpotrf - potrfBatched - hipsolverSpotrfBatched_bufferSize, hipsolverDpotrfBatched_bufferSize, hipsolverCpotrfBatched_bufferSize, hipsolverZpotrfBatched_bufferSize - hipsolverSpotrfBatched, hipsolverDpotrfBatched, hipsolverCpotrfBatched, hipsolverZpotrfBatched - syevd/heevd - hipsolverSsyevd_bufferSize, hipsolverDsyevd_bufferSize, hipsolverCheevd_bufferSize, hipsolverZheevd_bufferSize - hipsolverSsyevd, hipsolverDsyevd, hipsolverCheevd, hipsolverZheevd - sygvd/hegvd - hipsolverSsygvd_bufferSize, hipsolverDsygvd_bufferSize, hipsolverChegvd_bufferSize, hipsolverZhegvd_bufferSize - hipsolverSsygvd, hipsolverDsygvd, hipsolverChegvd, hipsolverZhegvd - sytrd/hetrd - hipsolverSsytrd_bufferSize, hipsolverDsytrd_bufferSize, hipsolverChetrd_bufferSize, hipsolverZhetrd_bufferSize - hipsolverSsytrd, hipsolverDsytrd, hipsolverChetrd, hipsolverZhetrd - getrf - hipsolverSgetrf_bufferSize, hipsolverDgetrf_bufferSize, hipsolverCgetrf_bufferSize, hipsolverZgetrf_bufferSize - hipsolverSgetrf, hipsolverDgetrf, hipsolverCgetrf, hipsolverZgetrf - auxiliary - hipsolverCreate, hipsolverDestroy - hipsolverSetStream, hipsolverGetStream ### Changed - hipSOLVER functions will now return HIPSOLVER_STATUS_INVALID_ENUM or HIPSOLVER_STATUS_UNKNOWN status codes rather than throw exceptions. - hipsolverXgetrf functions now take lwork as an argument. ### Removed - Removed unused HIPSOLVER_FILL_MODE_FULL enum value. - Removed hipsolverComplex and hipsolverDoubleComplex from the library. Use hipFloatComplex and hipDoubleComplex instead. hipSOLVER-rocm-5.5.1/CMakeLists.txt000066400000000000000000000257331436107207300167500ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## cmake_minimum_required( VERSION 3.7 ) # We use C++14 features, this will add compile option: -std=c++14 set( CMAKE_CXX_STANDARD 14 ) # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if( WIN32 ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" ) else( ) set( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories" ) endif( ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() if ( NOT DEFINED CMAKE_Fortran_COMPILER AND NOT DEFINED ENV{FC} ) set( CMAKE_Fortran_COMPILER "gfortran" ) endif() project( hipsolver LANGUAGES CXX ) if( UNIX ) enable_language( Fortran ) endif( ) # This finds the rocm-cmake project, and installs it if not found # rocm-cmake contains common cmake code for rocm projects to help setup and install set( PROJECT_EXTERN_DIR ${CMAKE_CURRENT_BINARY_DIR}/extern ) find_package( ROCM 0.7.3 CONFIG QUIET PATHS /opt/rocm ) if( NOT ROCM_FOUND ) set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") set(rocm_cmake_url "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip") set(rocm_cmake_path "${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}") set(rocm_cmake_archive "${rocm_cmake_path}.zip") file(DOWNLOAD "${rocm_cmake_url}" "${rocm_cmake_archive}" STATUS status LOG log) list(GET status 0 status_code) list(GET status 1 status_string) if(status_code EQUAL 0) message(STATUS "downloading... done") else() message(FATAL_ERROR "error: downloading\n'${rocm_cmake_url}' failed status_code: ${status_code} status_string: ${status_string} log: ${log}\n") endif() execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf "${rocm_cmake_archive}" WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}) execute_process( COMMAND ${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=${PROJECT_EXTERN_DIR}/rocm-cmake . WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag} ) execute_process( COMMAND ${CMAKE_COMMAND} --build rocm-cmake-${rocm_cmake_tag} --target install WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}) find_package( ROCM 0.7.3 REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake ) endif( ) include( ROCMSetupVersion ) include( ROCMCreatePackage ) include( ROCMInstallTargets ) include( ROCMPackageConfigHelpers ) include( ROCMInstallSymlinks ) include( ROCMClients ) include( ROCMHeaderWrapper ) set ( VERSION_STRING "1.7.0" ) rocm_setup_version( VERSION ${VERSION_STRING} ) if( NOT DEFINED ENV{HIP_PATH}) set( HIP_PATH "/opt/rocm/hip" ) else( ) set (HIP_PATH $ENV{HIP_PATH} ) endif( ) # Append our library helper cmake path and the cmake path for hip (for convenience) # Users may override HIP path by specifying their own in CMAKE_MODULE_PATH list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${ROCM_PATH}/lib/cmake/hip ${HIP_PATH}/cmake ) # NOTE: workaround until hip cmake modules fixes symlink logic in their config files; remove when fixed list( APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/llvm /opt/rocm/hip ) option( BUILD_VERBOSE "Output additional build information" OFF ) # BUILD_SHARED_LIBS is a cmake built-in; we make it an explicit option such that it shows in cmake-gui option( BUILD_SHARED_LIBS "Build hipSOLVER as a shared library" ON ) option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) add_library(hipsolver-common INTERFACE) if(BUILD_ADDRESS_SANITIZER) target_compile_options(hipsolver-common INTERFACE -fsanitize=address -shared-libasan ) target_link_options(hipsolver-common INTERFACE -fsanitize=address -shared-libasan -fuse-ld=lld ) endif() if( CMAKE_BUILD_TYPE STREQUAL "Debug" ) set( DEFAULT_ARMOR_LEVEL 1 ) else( ) set( DEFAULT_ARMOR_LEVEL 0 ) endif( ) set( ARMOR_LEVEL "${DEFAULT_ARMOR_LEVEL}" CACHE STRING "Enables increasingly expensive runtime correctness checks" ) include( armor-config ) # Find CUDA if the user wants a CUDA version. option(USE_CUDA "Look for CUDA and use that as a backend if found" OFF) if (USE_CUDA) find_package( CUDA REQUIRED ) endif() # Hip headers required of all clients; clients use hip to allocate device memory if( USE_CUDA) find_package( HIP MODULE REQUIRED ) else( ) find_package( hip REQUIRED CONFIG PATHS ${HIP_PATH} ${ROCM_PATH} /opt/rocm ) endif( ) if( USE_CUDA ) list( APPEND HIP_INCLUDE_DIRS "${HIP_ROOT_DIR}/include" ) endif( ) # FOR OPTIONAL CODE COVERAGE option(BUILD_CODE_COVERAGE "Build hipSOLVER with code coverage enabled" OFF) if(BUILD_CODE_COVERAGE) add_compile_options(-fprofile-arcs -ftest-coverage) add_link_options(--coverage) endif() include(CMakeDependentOption) cmake_dependent_option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg backward compatibility enabled" ON "NOT WIN32" OFF) if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY) rocm_wrap_header_dir( ${CMAKE_SOURCE_DIR}/library/include PATTERNS "*.h" GUARDS SYMLINK WRAPPER WRAPPER_LOCATIONS ${CMAKE_INSTALL_INCLUDEDIR} ) endif() if( WIN32 ) add_compile_definitions( WIN32_LEAN_AND_MEAN _CRT_SECURE_NO_WARNINGS NOMINMAX ) endif() add_subdirectory( library ) include( clients/cmake/build-options.cmake ) # Build clients of the library if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS ) if(NOT CLIENTS_OS) rocm_set_os_id(CLIENTS_OS) string(TOLOWER "${CLIENTS_OS}" CLIENTS_OS) rocm_read_os_release(CLIENTS_OS_VERSION VERSION_ID) endif() set(GFORTRAN_RPM "libgfortran4") set(GFORTRAN_DEB "libgfortran4") if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel") if(CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "8") set(GFORTRAN_RPM "libgfortran") endif() elseif(CLIENTS_OS STREQUAL "ubuntu" AND CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "20.04") set(GFORTRAN_DEB "libgfortran5") endif() rocm_package_setup_component(clients) if( UNIX ) set(DEP_ARGS DEPENDS RPM "${GFORTRAN_RPM}" DEB "${GFORTRAN_DEB}") endif() if( BUILD_CLIENTS_TESTS ) rocm_package_setup_client_component(tests ${DEP_ARGS}) endif() if( BUILD_CLIENTS_BENCHMARKS ) rocm_package_setup_client_component(benchmarks ${DEP_ARGS}) endif() add_subdirectory( clients ) endif( ) # Package specific CPACK vars if( NOT USE_CUDA ) rocm_package_add_dependencies(DEPENDS "rocblas >= 2.47.0" "rocsolver >= 3.21.0") endif( ) set( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" ) set( CPACK_RPM_PACKAGE_LICENSE "MIT" ) if(WIN32) set(CPACK_SOURCE_GENERATOR "ZIP") set(CPACK_GENERATOR "ZIP") set(CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path" FORCE) set(INSTALL_PREFIX "C:/hipSDK") set(CPACK_SET_DESTDIR OFF) set(CPACK_PACKAGE_INSTALL_DIRECTORY "C:/hipSDK") set(CPACK_PACKAGING_INSTALL_PREFIX "") set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) else() if(NOT CPACK_PACKAGING_INSTALL_PREFIX) set(CPACK_PACKAGING_INSTALL_PREFIX "${ROCM_PATH}") endif() endif() set( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" ) # Give hipsolver compiled for CUDA backend a different name if( NOT USE_CUDA ) set( package_name hipsolver ) else( ) set( package_name hipsolver-alt ) endif( ) set( HIPSOLVER_CONFIG_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" CACHE PATH "Path placed into ldconfig file" ) rocm_create_package( NAME ${package_name} DESCRIPTION "Radeon Open Compute LAPACK marshalling library" MAINTAINER "hipSOLVER Maintainer " LDCONFIG LDCONFIG_DIR ${HIPSOLVER_CONFIG_DIR} ) # ADDITIONAL TARGETS FOR CODE COVERAGE if(BUILD_CODE_COVERAGE) # # > make coverage_cleanup (clean coverage related files.) # > make coverage GTEST_FILTER=<> # will run: # > make coverage_analysis GTEST_FILTER=<> (analyze tests) # > make coverage_output (generate html documentation) # # set(coverage_test ./clients/staging/hipsolver-test) if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(coverage_test ./clients/staging/hipsolver-test-d) endif() # # Run coverage analysis # add_custom_target(coverage_analysis COMMAND echo Coverage GTEST_FILTER=\${GTEST_FILTER} COMMAND ${coverage_test} --gtest_filter=\"\${GTEST_FILTER}\" WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) add_dependencies(coverage_analysis hipsolver) # # Prepare coverage output # This little script is generated because the option '--gcov-tool ' of lcov cannot take arguments. # add_custom_target(coverage_output DEPENDS coverage_analysis COMMAND mkdir -p lcoverage COMMAND echo "\\#!/bin/bash" > llvm-gcov.sh COMMAND echo "\\# THIS FILE HAS BEEN GENERATED" >> llvm-gcov.sh COMMAND printf "exec /opt/rocm/llvm/bin/llvm-cov gcov $$\\@" >> llvm-gcov.sh COMMAND chmod +x llvm-gcov.sh ) # # Generate coverage output. # add_custom_command(TARGET coverage_output COMMAND lcov --directory . --base-directory . --gcov-tool ${CMAKE_BINARY_DIR}/llvm-gcov.sh --capture -o lcoverage/raw_main_coverage.info COMMAND lcov --remove lcoverage/raw_main_coverage.info "'/opt/*'" "'/usr/*'" -o lcoverage/main_coverage.info COMMAND genhtml --ignore-errors source lcoverage/main_coverage.info --output-directory lcoverage ) add_custom_target(coverage DEPENDS coverage_output) # # Coverage cleanup # add_custom_target(coverage_cleanup COMMAND find ${CMAKE_BINARY_DIR} -name *.gcda -delete WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) endif() hipSOLVER-rocm-5.5.1/LICENSE.md000066400000000000000000000021021436107207300155750ustar00rootroot00000000000000MIT License Copyright (C) 2020-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. hipSOLVER-rocm-5.5.1/README.md000066400000000000000000000055331436107207300154630ustar00rootroot00000000000000# hipSOLVER hipSOLVER is a LAPACK marshalling library, with multiple supported backends. It sits between the application and a 'worker' LAPACK library, marshalling inputs into the backend library and marshalling results back to the application. hipSOLVER exports an interface that does not require the client to change, regardless of the chosen backend. Currently, hipSOLVER supports [rocSOLVER](https://github.com/ROCmSoftwarePlatform/rocSOLVER) and [cuSOLVER](https://developer.nvidia.com/cusolver) as backends. ## Documentation For a detailed description of the hipSOLVER library, its implemented routines, the installation process and user guide, see the [hipSOLVER documentation](https://hipsolver.readthedocs.io/en/latest/). ## Quickstart Build To download the hipSOLVER source code, clone this repository with the command: git clone https://github.com/ROCmSoftwarePlatform/hipSOLVER.git hipSOLVER requires either cuSOLVER or rocSOLVER + rocBLAS to be installed on the system. Once these are installed, the following commands will build hipSOLVER and install to `/opt/rocm`: cd hipSOLVER ./install.sh -i Once installed, hipSOLVER can be used just like any other library with a C API. The header file will need to be included in the user code, and the hipSOLVER library will become a link-time and run-time dependency for the user application. For more information on building and installing hipSOLVER, see the [hipSOLVER install guide](https://hipsolver.readthedocs.io/en/latest/userguide_install.html) ## Using the hipSOLVER Interface The hipSOLVER interface is compatible with the rocSOLVER and cuSOLVER-v11 APIs. Porting a CUDA application that originally calls the cuSOLVER API to an application calling the hipSOLVER API should be fairly straightforward (see [porting a cuSOLVER application to hipSOLVER](https://hipsolver.readthedocs.io/en/latest/userguide_intro.html#porting-a-cusolver-application-to-hipsolver)). For example, the hipSOLVER SGEQRF interface is ```c hipsolverStatus_t hipsolverSgeqrf_bufferSize(hipsolverHandle_t handle, int m, int n, float* A, int lda, int* lwork); ``` ```c hipsolverStatus_t hipsolverSgeqrf(hipsolverHandle_t handle, int m, int n, float* A, int lda, float* tau, float* work, int lwork, int* devInfo); ``` ## Supported Functionality For a complete listing of all supported functions, see the [hipSOLVER user guide](https://hipsolver.readthedocs.io/en/latest/userguide_intro.html) and/or [API documentation](https://hipsolver.readthedocs.io/en/latest/api_index.html). hipSOLVER-rocm-5.5.1/bump_hipsolver_version.sh000077500000000000000000000015601436107207300213420ustar00rootroot00000000000000#!/bin/bash # run this script in develop after merging develop/staging into master at the feature-complete date # Edit script to bump versions for new development cycle/release. OLD_HIPSOLVER_VERSION="1.7.0" NEW_HIPSOLVER_VERSION="1.8.0" sed -i "s/${OLD_HIPSOLVER_VERSION}/${NEW_HIPSOLVER_VERSION}/g" CMakeLists.txt # for documentation OLD_HIPSOLVER_DOCS_VERSION="1.7" NEW_HIPSOLVER_DOCS_VERSION="1.8" sed -i "s/${OLD_HIPSOLVER_DOCS_VERSION}/${NEW_HIPSOLVER_DOCS_VERSION}/g" docs/source/conf.py # for rocBLAS/rocSOLVER package requirements OLD_MINIMUM_ROCBLAS_VERSION="2.47.0" NEW_MINIMUM_ROCBLAS_VERSION="2.48.0" OLD_MINIMUM_ROCSOLVER_VERSION="3.21.0" NEW_MINIMUM_ROCSOLVER_VERSION="3.22.0" sed -i "s/${OLD_MINIMUM_ROCBLAS_VERSION}/${NEW_MINIMUM_ROCBLAS_VERSION}/g" CMakeLists.txt sed -i "s/${OLD_MINIMUM_ROCSOLVER_VERSION}/${NEW_MINIMUM_ROCSOLVER_VERSION}/g" CMakeLists.txt hipSOLVER-rocm-5.5.1/clients/000077500000000000000000000000001436107207300156375ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/clients/CMakeLists.txt000066400000000000000000000060561436107207300204060ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if( WIN32 ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" ) else( ) set( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories" ) endif( ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() # This project may compile dependencies for clients project( hipsolver-clients LANGUAGES CXX ) if( UNIX ) enable_language( Fortran ) endif( ) # We use C++14 features, this will add compile option: -std=c++14 set( CMAKE_CXX_STANDARD 14 ) list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ) include( build-options ) if( UNIX ) set(hipsolver_f90_source_clients include/hipsolver_fortran.f90 ) endif( ) if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_SAMPLES ) if( UNIX ) add_library(hipsolver_fortran_client STATIC ${hipsolver_f90_source_clients}) add_dependencies(hipsolver_fortran_client hipsolver_fortran) include_directories(${CMAKE_BINARY_DIR}/include/hipsolver) include_directories(${CMAKE_BINARY_DIR}/include/hipsolver/internal) endif( ) endif( ) if( BUILD_CLIENTS_TESTS ) add_subdirectory( gtest ) endif( ) if( BUILD_CLIENTS_BENCHMARKS ) add_subdirectory( benchmarks ) endif( ) if( BUILD_CLIENTS_SAMPLES ) add_subdirectory( samples ) endif( ) hipSOLVER-rocm-5.5.1/clients/benchmarks/000077500000000000000000000000001436107207300177545ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/clients/benchmarks/CMakeLists.txt000066400000000000000000000070471436107207300225240ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## set( THREADS_PREFER_PTHREAD_FLAG ON ) find_package( Threads REQUIRED ) # Linking lapack library requires fortran flags find_package( cblas REQUIRED CONFIG ) if( NOT TARGET hipsolver ) find_package( hipsolver REQUIRED CONFIG PATHS /opt/rocm/hipsolver ) endif( ) set(hipsolver_benchmark_common ../common/lapack_host_reference.cpp ../common/hipsolver_datatype2string.cpp ../common/utility.cpp ) add_executable( hipsolver-bench client.cpp ${hipsolver_benchmark_common} ) # Internal header includes target_include_directories( hipsolver-bench PRIVATE $ ) # External header includes included as system files target_include_directories( hipsolver-bench SYSTEM PRIVATE $ $ ) target_link_libraries( hipsolver-bench PRIVATE cblas lapack blas roc::hipsolver ) if( UNIX ) target_link_libraries( hipsolver-bench PRIVATE hipsolver_fortran_client ) endif( ) target_link_libraries(hipsolver-bench PRIVATE $ ) add_armor_flags( hipsolver-bench "${ARMOR_LEVEL}" ) # need mf16c flag for float->half convertion target_compile_options( hipsolver-bench PRIVATE -mf16c) if( NOT USE_CUDA ) target_link_libraries( hipsolver-bench PRIVATE hip::host ) if( CUSTOM_TARGET ) target_link_libraries( hipsolver-bench PRIVATE hip::${CUSTOM_TARGET} ) endif( ) if( UNIX AND CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) # hip-clang needs specific flag to turn on pthread and m target_link_libraries( hipsolver-bench PRIVATE -lpthread -lm ) endif() else( ) target_compile_definitions( hipsolver-bench PRIVATE __HIP_PLATFORM_NVCC__ ) target_include_directories( hipsolver-bench PRIVATE $ ) target_link_libraries( hipsolver-bench PRIVATE ${CUDA_LIBRARIES} Threads::Threads ) endif( ) set_target_properties( hipsolver-bench PROPERTIES DEBUG_POSTFIX "-d" CXX_EXTENSIONS NO ) set_target_properties( hipsolver-bench PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) #add_dependencies( hipsolver-bench hipsolver-bench-common ) rocm_install(TARGETS hipsolver-bench COMPONENT benchmarks) target_compile_definitions( hipsolver-bench PRIVATE HIPSOLVER_BENCH ROCM_USE_FLOAT16 ) hipSOLVER-rocm-5.5.1/clients/benchmarks/client.cpp000066400000000000000000000462141436107207300217450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "../include/hipsolver_dispatcher.hpp" #include "../rocblascommon/program_options.hpp" using rocblas_int = int; using rocblas_stride = ptrdiff_t; using namespace roc; // clang-format off const char* help_str = R"HELP_STR( hipSOLVER benchmark client help. Usage: ./hipsolver-bench In addition to some common general options, the following list of options corresponds to all the parameters that might be needed to test a given hipSOLVER function. The parameters are named as in the API user guide. The arrays are initialized internally by the program with random values. Note: When a required parameter/option is not provided, it will take the default value as listed below. If no default value is defined, the program will try to calculate a suitable value depending on the context of the problem and the tested function; if this is not possible, the program will abort with an error. Functions that accept multiple size parameters can generally be provided a single size parameter (typically, m) and a square-size matrix will be assumed. Example: ./hipsolver-bench -f getrf -m 30 --lda 75 This will test getrf with a random 30x30 matrix. Options: )HELP_STR"; // clang-format on int main(int argc, char* argv[]) try { Arguments argus; // disable unit_check in client benchmark, it is only // used in gtest unit test argus.unit_check = 0; // enable timing check,otherwise no performance data collected argus.timing = 1; std::string function; char precision; rocblas_int device_id; // take arguments and set default values // clang-format off options_description desc("rocsolver client command line options"); desc.add_options()("help,h", "Produces this help message.") // test options ("batch_count", value(&argus.batch_count)->default_value(1), "Number of matrices or problem instances in the batch.\n" " Only applicable to batch routines.\n" " ") ("device", value(&device_id)->default_value(0), "Set the default device to be used for subsequent program runs.\n" " ") ("function,f", value(&function)->default_value("getrf"), "The LAPACK function to test.\n" " Options are: getrf, getrs, potrf, potrf_batched, etc.\n" " ") ("iters,i", value(&argus.iters)->default_value(10), "Iterations to run inside the GPU timing loop.\n" " Reported time will be the average.\n" " ") ("mem_query", value(&argus.mem_query)->default_value(0), "Calculate the required amount of device workspace memory? 0 = No, 1 = Yes.\n" " This forces the client to print only the amount of device memory required by\n" " the function, in bytes.\n" " ") ("perf", value(&argus.perf)->default_value(0), "Ignore CPU timing results? 0 = No, 1 = Yes.\n" " This forces the client to print only the GPU time and the error if requested.\n" " ") ("precision,r", value(&precision)->default_value('s'), "Precision to be used in the tests.\n" " Options are: s, d, c, z.\n" " ") // ("singular", // value(&argus.singular)->default_value(0), // "Test with degenerate matrices? 0 = No, 1 = Yes\n" // " This will produce matrices that are singular, non positive-definite, etc.\n" // " ") ("verify,v", value(&argus.norm_check)->default_value(0), "Validate GPU results with CPU? 0 = No, 1 = Yes.\n" " This will additionally print the relative error of the computations.\n" " ") // size options ("k", value(), "Matrix/vector size parameter.\n" " Represents a sub-dimension of a problem.\n" " For example, the number of Householder reflections in a transformation.\n" " ") ("m", value(), "Matrix/vector size parameter.\n" " Typically, the number of rows of a matrix.\n" " ") ("n", value(), "Matrix/vector size parameter.\n" " Typically, the number of columns of a matrix,\n" " or the order of a system or transformation.\n" " ") ("nrhs", value(), "Matrix/vector size parameter.\n" " Typically, the number of columns of a matrix on the right-hand side of a problem.\n" " ") // leading dimension options ("lda", value(), "Matrix size parameter.\n" " Leading dimension of matrices A.\n" " ") ("ldb", value(), "Matrix size parameter.\n" " Leading dimension of matrices B.\n" " ") ("ldc", value(), "Matrix size parameter.\n" " Leading dimension of matrices C.\n" " ") // ("ldt", // value(), // "Matrix size parameter.\n" // " Leading dimension of matrices T.\n" // " ") ("ldu", value(), "Matrix size parameter.\n" " Leading dimension of matrices U.\n" " ") ("ldv", value(), "Matrix size parameter.\n" " Leading dimension of matrices V.\n" " ") // ("ldw", // value(), // "Matrix size parameter.\n" // " Leading dimension of matrices W.\n" // " ") ("ldx", value(), "Matrix size parameter.\n" " Leading dimension of matrices X.\n" " ") // ("ldy", // value(), // "Matrix size parameter.\n" // " Leading dimension of matrices Y.\n" // " ") // stride options ("strideA", value(), "Matrix/vector stride parameter.\n" " Stride for matrices/vectors A.\n" " ") // ("strideB", // value(), // "Matrix/vector stride parameter.\n" // " Stride for matrices/vectors B.\n" // " ") // ("strideD", // value(), // "Matrix/vector stride parameter.\n" // " Stride for matrices/vectors D.\n" // " ") // ("strideE", // value(), // "Matrix/vector stride parameter.\n" // " Stride for matrices/vectors E.\n" // " ") // ("strideQ", // value(), // "Matrix/vector stride parameter.\n" // " Stride for vectors tauq.\n" // " ") // ("strideP", // value(), // "Matrix/vector stride parameter.\n" // " Stride for vectors tau, taup, and ipiv.\n" // " ") ("strideS", value(), "Matrix/vector stride parameter.\n" " Stride for matrices/vectors S.\n" " ") ("strideU", value(), "Matrix/vector stride parameter.\n" " Stride for matrices/vectors U.\n" " ") ("strideV", value(), "Matrix/vector stride parameter.\n" " Stride for matrices/vectors V.\n" " ") // bdsqr options // ("nc", // value()->default_value(0), // "The number of columns of matrix C.\n" // " Only applicable to bdsqr.\n" // " ") // ("nu", // value(), // "The number of columns of matrix U.\n" // " Only applicable to bdsqr.\n" // " ") // ("nv", // value()->default_value(0), // "The number of columns of matrix V.\n" // " Only applicable to bdsqr.\n" // " ") // laswp options // ("k1", // value(), // "First index for row interchange.\n" // " Only applicable to laswp.\n" // " ") // ("k2", // value(), // "Last index for row interchange.\n" // " Only applicable to laswp.\n" // " ") // gesvd options ("jobu", value()->default_value('N'), "N = none, A = the entire orthogonal matrix is computed,\n" " S = the singular vectors are computed,\n" " O = the singular vectors overwrite the original matrix.\n" " Indicates how the left singular vectors are to be calculated and stored.\n" " ") ("jobv", value()->default_value('N'), "N = none, A = the entire orthogonal matrix is computed,\n" " S = the singular vectors are computed,\n" " O = the singular vectors overwrite the original matrix.\n" " Indicates how the right singular vectors are to be calculated and stored.\n" " ") // partial eigenvalue/singular value decomposition options ("il", value(), "Lower index in ordered subset of eigenvalues.\n" " Used in partial eigenvalue decomposition functions.\n" " ") ("iu", value(), "Upper index in ordered subset of eigenvalues.\n" " Used in partial eigenvalue decomposition functions.\n" " ") ("range", value()->default_value('A'), "A = all eigenvalues, V = in (vl, vu], I = from the il-th to the iu-th.\n" " For partial eigenvalue decompositions, it indicates the type of interval in which\n" " the eigenvalues will be found.\n" " ") ("rank", value(), "The number of singular values to be computed.\n" " Used in partial SVD functions.\n" " ") ("vl", value(), "Lower bound of half-open interval (vl, vu].\n" " Used in partial eigenvalue decomposition functions.\n" " Note: the used random input matrices have all eigenvalues in [-20, 20].\n" " ") ("vu", value(), "Upper bound of half-open interval (vl, vu].\n" " Used in partial eigenvalue decomposition functions.\n" " Note: the used random input matrices have all eigenvalues in [-20, 20].\n" " ") // iterative Jacobi options ("econ", value()->default_value(0), "Enable economy size for singular vector matrices? 0 = No, 1 = Yes.\n" " Only applicable to gesvdj.\n" " ") ("max_sweeps", value()->default_value(100), "Maximum number of sweeps/iterations.\n" " Used in iterative Jacobi functions.\n" " ") ("tolerance", value(), "Absolute tolerance at which convergence is accepted.\n" " Used in iterative Jacobi functions.\n" " ") ("sort_eig", value()->default_value(1), "0 = no sorting, 1 = ascending order.\n" " Indicates whether the computed eigenvalues are sorted in ascending order.\n" " Used in iterative Jacobi functions.\n" " ") // other options // ("direct", // value()->default_value('F'), // "F = forward, B = backward.\n" // " The order in which a series of transformations are applied.\n" // " ") // ("fast_alg", // value()->default_value('O'), // "O = out-of-place, I = in-place.\n" // " Enables out-of-place computations.\n" // " ") // ("incx", // value()->default_value(1), // "Increment between values in vector x.\n" // " ") ("itype", value()->default_value('1'), "1 = Ax, 2 = ABx, 3 = BAx.\n" " Problem type for generalized eigenproblems.\n" " ") ("jobz", value()->default_value('N'), "N = none, V = compute eigenvectors/singular vectors of the matrix,\n" " Indicates how the eigenvectors/singular vectors are to be calculated and stored.\n" " ") ("side", value(), "L = left, R = right.\n" " The side from which a matrix should be multiplied.\n" " ") // ("storev", // value(), // "C = column-wise, R = row-wise.\n" // " Indicates whether data is stored column-wise or row-wise.\n" // " ") ("trans", value()->default_value('N'), "N = no transpose, T = transpose, C = conjugate transpose.\n" " Indicates if a matrix should be transposed.\n" " ") ("uplo", value()->default_value('U'), "U = upper, L = lower.\n" " Indicates where the data for a triangular or symmetric/hermitian matrix is stored.\n" " "); // clang-format on variables_map vm; store(parse_command_line(argc, argv, desc), vm); notify(vm); // print help message if(vm.count("help")) { std::cout << help_str << desc << std::endl; return 0; } argus.populate(vm); // set device ID if(!argus.perf) { rocblas_int device_count = query_device_property(); if(device_count <= device_id) throw std::invalid_argument("Invalid Device ID"); } set_device(device_id); // catch invalid arguments argus.validate_precision("precision"); argus.validate_operation("trans"); argus.validate_side("side"); argus.validate_fill("uplo"); // argus.validate_direct("direct"); // argus.validate_storev("storev"); argus.validate_svect("jobu"); argus.validate_svect("jobv"); // argus.validate_workmode("fast_alg"); argus.validate_itype("itype"); argus.validate_evect("jobz"); argus.validate_erange("range"); // select and dispatch function test/benchmark hipsolver_dispatcher::invoke(function, precision, argus); return 0; } catch(const std::invalid_argument& exp) { std::cerr << exp.what() << std::endl; return -1; } hipSOLVER-rocm-5.5.1/clients/cmake/000077500000000000000000000000001436107207300167175ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/clients/cmake/build-options.cmake000066400000000000000000000040611436107207300225120ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## # This file is intended to be used in two ways; independently in a stand alone PROJECT # and as part of a superbuild. If the file is included in a stand alone project, the # variables are not expected to be preset, and this will produce options() in the GUI # for the user to examine. If this file is included in a superbuild, the options will be # presented in the superbuild GUI, but then passed into the ExternalProject as -D # parameters, which would already define them. if( NOT BUILD_CLIENTS_TESTS ) option( BUILD_CLIENTS_TESTS "Build hipSOLVER unit tests" OFF ) endif( ) if( NOT BUILD_CLIENTS_BENCHMARKS ) option( BUILD_CLIENTS_BENCHMARKS "Build hipSOLVER benchmarks" OFF ) endif( ) if( NOT BUILD_CLIENTS_SAMPLES ) option( BUILD_CLIENTS_SAMPLES "Build hipSOLVER samples" OFF ) endif( ) hipSOLVER-rocm-5.5.1/clients/common/000077500000000000000000000000001436107207300171275ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/clients/common/hipsolver_datatype2string.cpp000066400000000000000000000145441436107207300250620ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "../include/hipsolver_datatype2string.hpp" /* ============================================================================================ */ /* Convert hipsolver constants to lapack char. */ char hipsolver2char_operation(hipsolverOperation_t value) { switch(value) { case HIPSOLVER_OP_N: return 'N'; case HIPSOLVER_OP_T: return 'T'; case HIPSOLVER_OP_C: return 'C'; default: throw std::invalid_argument("Invalid enum"); } } char hipsolver2char_fill(hipsolverFillMode_t value) { switch(value) { case HIPSOLVER_FILL_MODE_UPPER: return 'U'; case HIPSOLVER_FILL_MODE_LOWER: return 'L'; default: throw std::invalid_argument("Invalid enum"); } } char hipsolver2char_side(hipsolverSideMode_t value) { switch(value) { case HIPSOLVER_SIDE_LEFT: return 'L'; case HIPSOLVER_SIDE_RIGHT: return 'R'; default: throw std::invalid_argument("Invalid enum"); } } char hipsolver2char_evect(hipsolverEigMode_t value) { switch(value) { case HIPSOLVER_EIG_MODE_NOVECTOR: return 'N'; case HIPSOLVER_EIG_MODE_VECTOR: return 'V'; default: throw std::invalid_argument("Invalid enum"); } } char hipsolver2char_eform(hipsolverEigType_t value) { switch(value) { case HIPSOLVER_EIG_TYPE_1: return '1'; case HIPSOLVER_EIG_TYPE_2: return '2'; case HIPSOLVER_EIG_TYPE_3: return '3'; default: throw std::invalid_argument("Invalid enum"); } } char hipsolver2char_erange(hipsolverEigRange_t value) { switch(value) { case HIPSOLVER_EIG_RANGE_ALL: return 'A'; case HIPSOLVER_EIG_RANGE_V: return 'V'; case HIPSOLVER_EIG_RANGE_I: return 'I'; default: throw std::invalid_argument("Invalid enum"); } } /* ============================================================================================ */ /* Convert lapack char constants to hipsolver type. */ hipsolverStatus_t string2hipsolver_status(const std::string& value) { return value == "HIPSOLVER_STATUS_SUCCESS" ? HIPSOLVER_STATUS_SUCCESS : value == "HIPSOLVER_STATUS_NOT_INITIALIZED" ? HIPSOLVER_STATUS_NOT_INITIALIZED : value == "HIPSOLVER_STATUS_ALLOC_FAILED" ? HIPSOLVER_STATUS_ALLOC_FAILED : value == "HIPSOLVER_STATUS_INVALID_VALUE" ? HIPSOLVER_STATUS_INVALID_VALUE : value == "HIPSOLVER_STATUS_MAPPING_ERROR" ? HIPSOLVER_STATUS_MAPPING_ERROR : value == "HIPSOLVER_STATUS_EXECUTION_FAILED" ? HIPSOLVER_STATUS_EXECUTION_FAILED : value == "HIPSOLVER_STATUS_INTERNAL_ERROR" ? HIPSOLVER_STATUS_INTERNAL_ERROR : value == "HIPSOLVER_STATUS_NOT_SUPPORTED" ? HIPSOLVER_STATUS_NOT_SUPPORTED : value == "HIPSOLVER_STATUS_ARCH_MISMATCH" ? HIPSOLVER_STATUS_ARCH_MISMATCH : value == "HIPSOLVER_STATUS_HANDLE_IS_NULLPTR" ? HIPSOLVER_STATUS_HANDLE_IS_NULLPTR : value == "HIPSOLVER_STATUS_INVALID_ENUM" ? HIPSOLVER_STATUS_INVALID_ENUM : value == "HIPSOLVER_STATUS_UNKNOWN" ? HIPSOLVER_STATUS_UNKNOWN : static_cast(-1); } hipsolverOperation_t char2hipsolver_operation(char value) { switch(value) { case 'n': case 'N': return HIPSOLVER_OP_N; case 't': case 'T': return HIPSOLVER_OP_T; case 'c': case 'C': return HIPSOLVER_OP_C; default: throw std::invalid_argument("Invalid character"); } } hipsolverFillMode_t char2hipsolver_fill(char value) { switch(value) { case 'u': case 'U': return HIPSOLVER_FILL_MODE_UPPER; case 'l': case 'L': return HIPSOLVER_FILL_MODE_LOWER; default: throw std::invalid_argument("Invalid character"); } } hipsolverSideMode_t char2hipsolver_side(char value) { switch(value) { case 'l': case 'L': return HIPSOLVER_SIDE_LEFT; case 'r': case 'R': return HIPSOLVER_SIDE_RIGHT; default: throw std::invalid_argument("Invalid character"); } } hipsolverEigMode_t char2hipsolver_evect(char value) { switch(value) { case 'n': case 'N': return HIPSOLVER_EIG_MODE_NOVECTOR; case 'v': case 'V': return HIPSOLVER_EIG_MODE_VECTOR; default: throw std::invalid_argument("Invalid character"); } } hipsolverEigType_t char2hipsolver_eform(char value) { switch(value) { case '1': return HIPSOLVER_EIG_TYPE_1; case '2': return HIPSOLVER_EIG_TYPE_2; case '3': return HIPSOLVER_EIG_TYPE_3; default: throw std::invalid_argument("Invalid character"); } } hipsolverEigRange_t char2hipsolver_erange(char value) { switch(value) { case 'A': return HIPSOLVER_EIG_RANGE_ALL; case 'V': return HIPSOLVER_EIG_RANGE_V; case 'I': return HIPSOLVER_EIG_RANGE_I; default: throw std::invalid_argument("Invalid character"); } } hipSOLVER-rocm-5.5.1/clients/common/lapack_host_reference.cpp000066400000000000000000004026351436107207300241530ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "../include/lapack_host_reference.hpp" #include "cblas.h" #include "hipsolver.h" /*!\file * \brief provide template functions interfaces to BLAS and LAPACK interfaces, it is * only used for testing, not part of the GPU library */ /*************************************************************************/ // These are C wrapper calls to CBLAS and fortran LAPACK #ifdef __cplusplus extern "C" { #endif void ssymm_(char* side, char* uplo, int* m, int* n, float* alpha, float* A, int* lda, float* B, int* ldb, float* beta, float* C, int* ldc); void dsymm_(char* side, char* uplo, int* m, int* n, double* alpha, double* A, int* lda, double* B, int* ldb, double* beta, double* C, int* ldc); void chemm_(char* side, char* uplo, int* m, int* n, hipsolverComplex* alpha, hipsolverComplex* A, int* lda, hipsolverComplex* B, int* ldb, hipsolverComplex* beta, hipsolverComplex* C, int* ldc); void zhemm_(char* side, char* uplo, int* m, int* n, hipsolverDoubleComplex* alpha, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* B, int* ldb, hipsolverDoubleComplex* beta, hipsolverDoubleComplex* C, int* ldc); void ssymv_(char* uplo, int* n, float* alpha, float* A, int* lda, float* x, int* incx, float* beta, float* y, int* incy); void dsymv_(char* uplo, int* n, double* alpha, double* A, int* lda, double* x, int* incx, double* beta, double* y, int* incy); void chemv_(char* uplo, int* n, hipsolverComplex* alpha, hipsolverComplex* A, int* lda, hipsolverComplex* x, int* incx, hipsolverComplex* beta, hipsolverComplex* y, int* incy); void zhemv_(char* uplo, int* n, hipsolverDoubleComplex* alpha, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* x, int* incx, hipsolverDoubleComplex* beta, hipsolverDoubleComplex* y, int* incy); void strmm_(char* side, char* uplo, char* transA, char* diag, int* m, int* n, float* alpha, float* A, int* lda, float* B, int* ldb); void dtrmm_(char* side, char* uplo, char* transA, char* diag, int* m, int* n, double* alpha, double* A, int* lda, double* B, int* ldb); void ctrmm_(char* side, char* uplo, char* transA, char* diag, int* m, int* n, hipsolverComplex* alpha, hipsolverComplex* A, int* lda, hipsolverComplex* B, int* ldb); void ztrmm_(char* side, char* uplo, char* transA, char* diag, int* m, int* n, hipsolverDoubleComplex* alpha, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* B, int* ldb); void strsm_(char* side, char* uplo, char* transA, char* diag, int* m, int* n, float* alpha, float* A, int* lda, float* B, int* ldb); void dtrsm_(char* side, char* uplo, char* transA, char* diag, int* m, int* n, double* alpha, double* A, int* lda, double* B, int* ldb); void ctrsm_(char* side, char* uplo, char* transA, char* diag, int* m, int* n, hipsolverComplex* alpha, hipsolverComplex* A, int* lda, hipsolverComplex* B, int* ldb); void ztrsm_(char* side, char* uplo, char* transA, char* diag, int* m, int* n, hipsolverDoubleComplex* alpha, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* B, int* ldb); void clacgv_(int* n, hipsolverComplex* x, int* incx); void zlacgv_(int* n, hipsolverDoubleComplex* x, int* incx); void slarf_( char* side, int* m, int* n, float* x, int* incx, float* alpha, float* A, int* lda, float* work); void dlarf_(char* side, int* m, int* n, double* x, int* incx, double* alpha, double* A, int* lda, double* work); void clarf_(char* side, int* m, int* n, hipsolverComplex* x, int* incx, hipsolverComplex* alpha, hipsolverComplex* A, int* lda, hipsolverComplex* work); void zlarf_(char* side, int* m, int* n, hipsolverDoubleComplex* x, int* incx, hipsolverDoubleComplex* alpha, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* work); void sorgbr_(char* vect, int* m, int* n, int* k, float* A, int* lda, float* Ipiv, float* work, int* size_w, int* info); void dorgbr_(char* vect, int* m, int* n, int* k, double* A, int* lda, double* Ipiv, double* work, int* size_w, int* info); void cungbr_(char* vect, int* m, int* n, int* k, hipsolverComplex* A, int* lda, hipsolverComplex* Ipiv, hipsolverComplex* work, int* size_w, int* info); void zungbr_(char* vect, int* m, int* n, int* k, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* Ipiv, hipsolverDoubleComplex* work, int* size_w, int* info); void sorgqr_( int* m, int* n, int* k, float* A, int* lda, float* ipiv, float* work, int* lwork, int* info); void dorgqr_( int* m, int* n, int* k, double* A, int* lda, double* ipiv, double* work, int* lwork, int* info); void cungqr_(int* m, int* n, int* k, hipsolverComplex* A, int* lda, hipsolverComplex* ipiv, hipsolverComplex* work, int* lwork, int* info); void zungqr_(int* m, int* n, int* k, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* ipiv, hipsolverDoubleComplex* work, int* lwork, int* info); void sorgtr_( char* uplo, int* n, float* A, int* lda, float* Ipiv, float* work, int* size_w, int* info); void dorgtr_( char* uplo, int* n, double* A, int* lda, double* Ipiv, double* work, int* size_w, int* info); void cungtr_(char* uplo, int* n, hipsolverComplex* A, int* lda, hipsolverComplex* Ipiv, hipsolverComplex* work, int* size_w, int* info); void zungtr_(char* uplo, int* n, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* Ipiv, hipsolverDoubleComplex* work, int* size_w, int* info); void sormqr_(char* side, char* trans, int* m, int* n, int* k, float* A, int* lda, float* ipiv, float* C, int* ldc, float* work, int* sizeW, int* info); void dormqr_(char* side, char* trans, int* m, int* n, int* k, double* A, int* lda, double* ipiv, double* C, int* ldc, double* work, int* sizeW, int* info); void cunmqr_(char* side, char* trans, int* m, int* n, int* k, hipsolverComplex* A, int* lda, hipsolverComplex* ipiv, hipsolverComplex* C, int* ldc, hipsolverComplex* work, int* sizeW, int* info); void zunmqr_(char* side, char* trans, int* m, int* n, int* k, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* ipiv, hipsolverDoubleComplex* C, int* ldc, hipsolverDoubleComplex* work, int* sizeW, int* info); void sormtr_(char* side, char* uplo, char* trans, int* m, int* n, float* A, int* lda, float* ipiv, float* C, int* ldc, float* work, int* sizeW, int* info); void dormtr_(char* side, char* uplo, char* trans, int* m, int* n, double* A, int* lda, double* ipiv, double* C, int* ldc, double* work, int* sizeW, int* info); void cunmtr_(char* side, char* uplo, char* trans, int* m, int* n, hipsolverComplex* A, int* lda, hipsolverComplex* ipiv, hipsolverComplex* C, int* ldc, hipsolverComplex* work, int* sizeW, int* info); void zunmtr_(char* side, char* uplo, char* trans, int* m, int* n, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* ipiv, hipsolverDoubleComplex* C, int* ldc, hipsolverDoubleComplex* work, int* sizeW, int* info); void sgebrd_(int* m, int* n, float* A, int* lda, float* D, float* E, float* tauq, float* taup, float* work, int* size_w, int* info); void dgebrd_(int* m, int* n, double* A, int* lda, double* D, double* E, double* tauq, double* taup, double* work, int* size_w, int* info); void cgebrd_(int* m, int* n, hipsolverComplex* A, int* lda, float* D, float* E, hipsolverComplex* tauq, hipsolverComplex* taup, hipsolverComplex* work, int* size_w, int* info); void zgebrd_(int* m, int* n, hipsolverDoubleComplex* A, int* lda, double* D, double* E, hipsolverDoubleComplex* tauq, hipsolverDoubleComplex* taup, hipsolverDoubleComplex* work, int* size_w, int* info); void sgels_(char* trans, int* m, int* n, int* nrhs, float* A, int* lda, float* B, int* ldb, float* work, int* lwork, int* info); void dgels_(char* trans, int* m, int* n, int* nrhs, double* A, int* lda, double* B, int* ldb, double* work, int* lwork, int* info); void cgels_(char* trans, int* m, int* n, int* nrhs, hipsolverComplex* A, int* lda, hipsolverComplex* B, int* ldb, hipsolverComplex* work, int* lwork, int* info); void zgels_(char* trans, int* m, int* n, int* nrhs, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* B, int* ldb, hipsolverDoubleComplex* work, int* lwork, int* info); void sgeqrf_(int* m, int* n, float* A, int* lda, float* ipiv, float* work, int* lwork, int* info); void dgeqrf_( int* m, int* n, double* A, int* lda, double* ipiv, double* work, int* lwork, int* info); void cgeqrf_(int* m, int* n, hipsolverComplex* A, int* lda, hipsolverComplex* ipiv, hipsolverComplex* work, int* lwork, int* info); void zgeqrf_(int* m, int* n, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* ipiv, hipsolverDoubleComplex* work, int* lwork, int* info); void sgesv_(int* n, int* nrhs, float* A, int* lda, int* ipiv, float* B, int* ldb, int* info); void dgesv_(int* n, int* nrhs, double* A, int* lda, int* ipiv, double* B, int* ldb, int* info); void cgesv_(int* n, int* nrhs, hipsolverComplex* A, int* lda, int* ipiv, hipsolverComplex* B, int* ldb, int* info); void zgesv_(int* n, int* nrhs, hipsolverDoubleComplex* A, int* lda, int* ipiv, hipsolverDoubleComplex* B, int* ldb, int* info); void sgesvd_(char* jobu, char* jobv, int* m, int* n, float* A, int* lda, float* S, float* U, int* ldu, float* V, int* ldv, float* E, int* lwork, int* info); void dgesvd_(char* jobu, char* jobv, int* m, int* n, double* A, int* lda, double* S, double* U, int* ldu, double* V, int* ldv, double* E, int* lwork, int* info); void cgesvd_(char* jobu, char* jobv, int* m, int* n, hipsolverComplex* A, int* lda, float* S, hipsolverComplex* U, int* ldu, hipsolverComplex* V, int* ldv, hipsolverComplex* work, int* lwork, float* E, int* info); void zgesvd_(char* jobu, char* jobv, int* m, int* n, hipsolverDoubleComplex* A, int* lda, double* S, hipsolverDoubleComplex* U, int* ldu, hipsolverDoubleComplex* V, int* ldv, hipsolverDoubleComplex* work, int* lwork, double* E, int* info); void sgesvdx_(char* jobu, char* jobv, char* srange, int* m, int* n, float* A, int* lda, float* vl, float* vu, int* il, int* iu, int* nsv, float* S, float* U, int* ldu, float* V, int* ldv, float* work, int* lwork, int* iwork, int* info); void dgesvdx_(char* jobu, char* jobv, char* srange, int* m, int* n, double* A, int* lda, double* vl, double* vu, int* il, int* iu, int* nsv, double* S, double* U, int* ldu, double* V, int* ldv, double* work, int* lwork, int* iwork, int* info); void cgesvdx_(char* jobu, char* jobv, char* srange, int* m, int* n, hipsolverComplex* A, int* lda, float* vl, float* vu, int* il, int* iu, int* nsv, float* S, hipsolverComplex* U, int* ldu, hipsolverComplex* V, int* ldv, hipsolverComplex* work, int* lwork, float* rwork, int* iwork, int* info); void zgesvdx_(char* jobu, char* jobv, char* srange, int* m, int* n, hipsolverDoubleComplex* A, int* lda, double* vl, double* vu, int* il, int* iu, int* nsv, double* S, hipsolverDoubleComplex* U, int* ldu, hipsolverDoubleComplex* V, int* ldv, hipsolverDoubleComplex* work, int* lwork, double* rwork, int* iwork, int* info); void sgetrf_(int* m, int* n, float* A, int* lda, int* ipiv, int* info); void dgetrf_(int* m, int* n, double* A, int* lda, int* ipiv, int* info); void cgetrf_(int* m, int* n, hipsolverComplex* A, int* lda, int* ipiv, int* info); void zgetrf_(int* m, int* n, hipsolverDoubleComplex* A, int* lda, int* ipiv, int* info); void sgetrs_( char* trans, int* n, int* nrhs, float* A, int* lda, int* ipiv, float* B, int* ldb, int* info); void dgetrs_( char* trans, int* n, int* nrhs, double* A, int* lda, int* ipiv, double* B, int* ldb, int* info); void cgetrs_(char* trans, int* n, int* nrhs, hipsolverComplex* A, int* lda, int* ipiv, hipsolverComplex* B, int* ldb, int* info); void zgetrs_(char* trans, int* n, int* nrhs, hipsolverDoubleComplex* A, int* lda, int* ipiv, hipsolverDoubleComplex* B, int* ldb, int* info); void spotrf_(char* uplo, int* m, float* A, int* lda, int* info); void dpotrf_(char* uplo, int* m, double* A, int* lda, int* info); void cpotrf_(char* uplo, int* m, hipsolverComplex* A, int* lda, int* info); void zpotrf_(char* uplo, int* m, hipsolverDoubleComplex* A, int* lda, int* info); void spotri_(char* uplo, int* n, float* A, int* lda, int* info); void dpotri_(char* uplo, int* n, double* A, int* lda, int* info); void cpotri_(char* uplo, int* n, hipsolverComplex* A, int* lda, int* info); void zpotri_(char* uplo, int* n, hipsolverDoubleComplex* A, int* lda, int* info); void spotrs_(char* uplo, int* n, int* nrhs, float* A, int* lda, float* B, int* ldb, int* info); void dpotrs_(char* uplo, int* n, int* nrhs, double* A, int* lda, double* B, int* ldb, int* info); void cpotrs_(char* uplo, int* n, int* nrhs, hipsolverComplex* A, int* lda, hipsolverComplex* B, int* ldb, int* info); void zpotrs_(char* uplo, int* n, int* nrhs, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* B, int* ldb, int* info); void ssyevd_(char* evect, char* uplo, int* n, float* A, int* lda, float* W, float* work, int* lwork, int* iwork, int* liwork, int* info); void dsyevd_(char* evect, char* uplo, int* n, double* A, int* lda, double* W, double* work, int* lwork, int* iwork, int* liwork, int* info); void cheevd_(char* evect, char* uplo, int* n, hipsolverComplex* A, int* lda, float* W, hipsolverComplex* work, int* lwork, float* rwork, int* lrwork, int* iwork, int* liwork, int* info); void zheevd_(char* evect, char* uplo, int* n, hipsolverDoubleComplex* A, int* lda, double* W, hipsolverDoubleComplex* work, int* lwork, double* rwork, int* lrwork, int* iwork, int* liwork, int* info); void ssyevx_(char* evect, char* erange, char* uplo, int* n, float* A, int* lda, float* vl, float* vu, int* il, int* iu, float* abstol, int* nev, float* W, float* Z, int* ldz, float* work, int* lwork, int* iwork, int* ifail, int* info); void dsyevx_(char* evect, char* erange, char* uplo, int* n, double* A, int* lda, double* vl, double* vu, int* il, int* iu, double* abstol, int* nev, double* W, double* Z, int* ldz, double* work, int* lwork, int* iwork, int* ifail, int* info); void cheevx_(char* evect, char* erange, char* uplo, int* n, hipsolverComplex* A, int* lda, float* vl, float* vu, int* il, int* iu, float* abstol, int* nev, float* W, hipsolverComplex* Z, int* ldz, hipsolverComplex* work, int* lwork, float* rwork, int* iwork, int* ifail, int* info); void zheevx_(char* evect, char* erange, char* uplo, int* n, hipsolverDoubleComplex* A, int* lda, double* vl, double* vu, int* il, int* iu, double* abstol, int* nev, double* W, hipsolverDoubleComplex* Z, int* ldz, hipsolverDoubleComplex* work, int* lwork, double* rwork, int* iwork, int* ifail, int* info); void ssygvd_(int* itype, char* evect, char* uplo, int* n, float* A, int* lda, float* B, int* ldb, float* W, float* work, int* lwork, int* iwork, int* liwork, int* info); void dsygvd_(int* itype, char* evect, char* uplo, int* n, double* A, int* lda, double* B, int* ldb, double* W, double* work, int* lwork, int* iwork, int* liwork, int* info); void chegvd_(int* itype, char* evect, char* uplo, int* n, hipsolverComplex* A, int* lda, hipsolverComplex* B, int* ldb, float* W, hipsolverComplex* work, int* lwork, float* rwork, int* lrwork, int* iwork, int* liwork, int* info); void zhegvd_(int* itype, char* evect, char* uplo, int* n, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* B, int* ldb, double* W, hipsolverDoubleComplex* work, int* lwork, double* rwork, int* lrwork, int* iwork, int* liwork, int* info); void ssygvx_(int* itype, char* evect, char* erange, char* uplo, int* n, float* A, int* lda, float* B, int* ldb, float* vl, float* vu, int* il, int* iu, float* abstol, int* nev, float* W, float* Z, int* ldz, float* work, int* lwork, int* iwork, int* ifail, int* info); void dsygvx_(int* itype, char* evect, char* erange, char* uplo, int* n, double* A, int* lda, double* B, int* ldb, double* vl, double* vu, int* il, int* iu, double* abstol, int* nev, double* W, double* Z, int* ldz, double* work, int* lwork, int* iwork, int* ifail, int* info); void chegvx_(int* itype, char* evect, char* erange, char* uplo, int* n, hipsolverComplex* A, int* lda, hipsolverComplex* B, int* ldb, float* vl, float* vu, int* il, int* iu, float* abstol, int* nev, float* W, hipsolverComplex* Z, int* ldz, hipsolverComplex* work, int* lwork, float* rwork, int* iwork, int* ifail, int* info); void zhegvx_(int* itype, char* evect, char* erange, char* uplo, int* n, hipsolverDoubleComplex* A, int* lda, hipsolverDoubleComplex* B, int* ldb, double* vl, double* vu, int* il, int* iu, double* abstol, int* nev, double* W, hipsolverDoubleComplex* Z, int* ldz, hipsolverDoubleComplex* work, int* lwork, double* rwork, int* iwork, int* ifail, int* info); void ssytrd_(char* uplo, int* n, float* A, int* lda, float* D, float* E, float* tau, float* work, int* size_w, int* info); void dsytrd_(char* uplo, int* n, double* A, int* lda, double* D, double* E, double* tau, double* work, int* size_w, int* info); void chetrd_(char* uplo, int* n, hipsolverComplex* A, int* lda, float* D, float* E, hipsolverComplex* tau, hipsolverComplex* work, int* size_w, int* info); void zhetrd_(char* uplo, int* n, hipsolverDoubleComplex* A, int* lda, double* D, double* E, hipsolverDoubleComplex* tau, hipsolverDoubleComplex* work, int* size_w, int* info); void ssytrf_(char* uplo, int* n, float* A, int* lda, int* ipiv, float* work, int* lwork, int* info); void dsytrf_( char* uplo, int* n, double* A, int* lda, int* ipiv, double* work, int* lwork, int* info); void csytrf_(char* uplo, int* n, hipsolverComplex* A, int* lda, int* ipiv, hipsolverComplex* work, int* lwork, int* info); void zsytrf_(char* uplo, int* n, hipsolverDoubleComplex* A, int* lda, int* ipiv, hipsolverDoubleComplex* work, int* lwork, int* info); #ifdef __cplusplus } #endif /************************************************************************/ /************************************************************************/ // These are templated BLAS functions used in hipSOLVER clients code // gemm template <> void cblas_gemm(hipsolverOperation_t transA, hipsolverOperation_t transB, int m, int n, int k, float alpha, float* A, int lda, float* B, int ldb, float beta, float* C, int ldc) { cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_gemm(hipsolverOperation_t transA, hipsolverOperation_t transB, int m, int n, int k, double alpha, double* A, int lda, double* B, int ldb, double beta, double* C, int ldc) { cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void cblas_gemm(hipsolverOperation_t transA, hipsolverOperation_t transB, int m, int n, int k, hipsolverComplex alpha, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, hipsolverComplex beta, hipsolverComplex* C, int ldc) { cblas_cgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void cblas_gemm(hipsolverOperation_t transA, hipsolverOperation_t transB, int m, int n, int k, hipsolverDoubleComplex alpha, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, hipsolverDoubleComplex beta, hipsolverDoubleComplex* C, int ldc) { cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } // symm & hemm template <> void cblas_symm_hemm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, int m, int n, float alpha, float* A, int lda, float* B, int ldb, float beta, float* C, int ldc) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); ssymm_(&sideC, &uploC, &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } template <> void cblas_symm_hemm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, int m, int n, double alpha, double* A, int lda, double* B, int ldb, double beta, double* C, int ldc) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); dsymm_(&sideC, &uploC, &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } template <> void cblas_symm_hemm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, int m, int n, hipsolverComplex alpha, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, hipsolverComplex beta, hipsolverComplex* C, int ldc) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); chemm_(&sideC, &uploC, &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } template <> void cblas_symm_hemm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, int m, int n, hipsolverDoubleComplex alpha, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, hipsolverDoubleComplex beta, hipsolverDoubleComplex* C, int ldc) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); zhemm_(&sideC, &uploC, &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // symv/hemv template <> void cblas_symv_hemv(hipsolverFillMode_t uplo, int n, float alpha, float* A, int lda, float* x, int incx, float beta, float* y, int incy) { char uploC = hipsolver2char_fill(uplo); ssymv_(&uploC, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } template <> void cblas_symv_hemv(hipsolverFillMode_t uplo, int n, double alpha, double* A, int lda, double* x, int incx, double beta, double* y, int incy) { char uploC = hipsolver2char_fill(uplo); dsymv_(&uploC, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } template <> void cblas_symv_hemv(hipsolverFillMode_t uplo, int n, hipsolverComplex alpha, hipsolverComplex* A, int lda, hipsolverComplex* x, int incx, hipsolverComplex beta, hipsolverComplex* y, int incy) { char uploC = hipsolver2char_fill(uplo); chemv_(&uploC, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } template <> void cblas_symv_hemv(hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex alpha, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* x, int incx, hipsolverDoubleComplex beta, hipsolverDoubleComplex* y, int incy) { char uploC = hipsolver2char_fill(uplo); zhemv_(&uploC, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } // trmm template <> void cblas_trmm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, float alpha, float* A, int lda, float* B, int ldb) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(transA); strmm_(&sideC, &uploC, &transC, &diag, &m, &n, &alpha, A, &lda, B, &ldb); } template <> void cblas_trmm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, double alpha, double* A, int lda, double* B, int ldb) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(transA); dtrmm_(&sideC, &uploC, &transC, &diag, &m, &n, &alpha, A, &lda, B, &ldb); } template <> void cblas_trmm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, hipsolverComplex alpha, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(transA); ctrmm_(&sideC, &uploC, &transC, &diag, &m, &n, &alpha, A, &lda, B, &ldb); } template <> void cblas_trmm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, hipsolverDoubleComplex alpha, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(transA); ztrmm_(&sideC, &uploC, &transC, &diag, &m, &n, &alpha, A, &lda, B, &ldb); } // trsm template <> void cblas_trsm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, float alpha, float* A, int lda, float* B, int ldb) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(transA); strsm_(&sideC, &uploC, &transC, &diag, &m, &n, &alpha, A, &lda, B, &ldb); } template <> void cblas_trsm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, double alpha, double* A, int lda, double* B, int ldb) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(transA); dtrsm_(&sideC, &uploC, &transC, &diag, &m, &n, &alpha, A, &lda, B, &ldb); } template <> void cblas_trsm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, hipsolverComplex alpha, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(transA); ctrsm_(&sideC, &uploC, &transC, &diag, &m, &n, &alpha, A, &lda, B, &ldb); } template <> void cblas_trsm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, hipsolverDoubleComplex alpha, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(transA); ztrsm_(&sideC, &uploC, &transC, &diag, &m, &n, &alpha, A, &lda, B, &ldb); } /************************************************************************/ // These are templated LAPACK functions used in hipSOLVER clients code // lacgv template <> void cblas_lacgv(int n, hipsolverComplex* x, int incx) { clacgv_(&n, x, &incx); } template <> void cblas_lacgv(int n, hipsolverDoubleComplex* x, int incx) { zlacgv_(&n, x, &incx); } // larf template <> void cblas_larf(hipsolverSideMode_t sideR, int m, int n, float* x, int incx, float* alpha, float* A, int lda, float* work) { char side = hipsolver2char_side(sideR); slarf_(&side, &m, &n, x, &incx, alpha, A, &lda, work); } template <> void cblas_larf(hipsolverSideMode_t sideR, int m, int n, double* x, int incx, double* alpha, double* A, int lda, double* work) { char side = hipsolver2char_side(sideR); dlarf_(&side, &m, &n, x, &incx, alpha, A, &lda, work); } template <> void cblas_larf(hipsolverSideMode_t sideR, int m, int n, hipsolverComplex* x, int incx, hipsolverComplex* alpha, hipsolverComplex* A, int lda, hipsolverComplex* work) { char side = hipsolver2char_side(sideR); clarf_(&side, &m, &n, x, &incx, alpha, A, &lda, work); } template <> void cblas_larf(hipsolverSideMode_t sideR, int m, int n, hipsolverDoubleComplex* x, int incx, hipsolverDoubleComplex* alpha, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* work) { char side = hipsolver2char_side(sideR); zlarf_(&side, &m, &n, x, &incx, alpha, A, &lda, work); } // orgbr & ungbr template <> void cblas_orgbr_ungbr(hipsolverSideMode_t side, int m, int n, int k, float* A, int lda, float* Ipiv, float* work, int size_w, int* info) { char vect; if(side == HIPSOLVER_SIDE_LEFT) vect = 'Q'; else vect = 'P'; sorgbr_(&vect, &m, &n, &k, A, &lda, Ipiv, work, &size_w, info); } template <> void cblas_orgbr_ungbr(hipsolverSideMode_t side, int m, int n, int k, double* A, int lda, double* Ipiv, double* work, int size_w, int* info) { char vect; if(side == HIPSOLVER_SIDE_LEFT) vect = 'Q'; else vect = 'P'; dorgbr_(&vect, &m, &n, &k, A, &lda, Ipiv, work, &size_w, info); } template <> void cblas_orgbr_ungbr(hipsolverSideMode_t side, int m, int n, int k, hipsolverComplex* A, int lda, hipsolverComplex* Ipiv, hipsolverComplex* work, int size_w, int* info) { char vect; if(side == HIPSOLVER_SIDE_LEFT) vect = 'Q'; else vect = 'P'; cungbr_(&vect, &m, &n, &k, A, &lda, Ipiv, work, &size_w, info); } template <> void cblas_orgbr_ungbr(hipsolverSideMode_t side, int m, int n, int k, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* Ipiv, hipsolverDoubleComplex* work, int size_w, int* info) { char vect; if(side == HIPSOLVER_SIDE_LEFT) vect = 'Q'; else vect = 'P'; zungbr_(&vect, &m, &n, &k, A, &lda, Ipiv, work, &size_w, info); } // orgqr & ungqr template <> void cblas_orgqr_ungqr( int m, int n, int k, float* A, int lda, float* ipiv, float* work, int lwork, int* info) { sorgqr_(&m, &n, &k, A, &lda, ipiv, work, &lwork, info); } template <> void cblas_orgqr_ungqr( int m, int n, int k, double* A, int lda, double* ipiv, double* work, int lwork, int* info) { dorgqr_(&m, &n, &k, A, &lda, ipiv, work, &lwork, info); } template <> void cblas_orgqr_ungqr(int m, int n, int k, hipsolverComplex* A, int lda, hipsolverComplex* ipiv, hipsolverComplex* work, int lwork, int* info) { cungqr_(&m, &n, &k, A, &lda, ipiv, work, &lwork, info); } template <> void cblas_orgqr_ungqr(int m, int n, int k, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* ipiv, hipsolverDoubleComplex* work, int lwork, int* info) { zungqr_(&m, &n, &k, A, &lda, ipiv, work, &lwork, info); } // orgtr & ungtr template <> void cblas_orgtr_ungtr(hipsolverFillMode_t uplo, int n, float* A, int lda, float* Ipiv, float* work, int size_w, int* info) { char uploC = hipsolver2char_fill(uplo); sorgtr_(&uploC, &n, A, &lda, Ipiv, work, &size_w, info); } template <> void cblas_orgtr_ungtr(hipsolverFillMode_t uplo, int n, double* A, int lda, double* Ipiv, double* work, int size_w, int* info) { char uploC = hipsolver2char_fill(uplo); dorgtr_(&uploC, &n, A, &lda, Ipiv, work, &size_w, info); } template <> void cblas_orgtr_ungtr(hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, hipsolverComplex* Ipiv, hipsolverComplex* work, int size_w, int* info) { char uploC = hipsolver2char_fill(uplo); cungtr_(&uploC, &n, A, &lda, Ipiv, work, &size_w, info); } template <> void cblas_orgtr_ungtr(hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* Ipiv, hipsolverDoubleComplex* work, int size_w, int* info) { char uploC = hipsolver2char_fill(uplo); zungtr_(&uploC, &n, A, &lda, Ipiv, work, &size_w, info); } // ormqr & unmqr template <> void cblas_ormqr_unmqr(hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, float* A, int lda, float* ipiv, float* C, int ldc, float* work, int lwork, int* info) { char sideC = hipsolver2char_side(side); char transC = hipsolver2char_operation(trans); sormqr_(&sideC, &transC, &m, &n, &k, A, &lda, ipiv, C, &ldc, work, &lwork, info); } template <> void cblas_ormqr_unmqr(hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, double* A, int lda, double* ipiv, double* C, int ldc, double* work, int lwork, int* info) { char sideC = hipsolver2char_side(side); char transC = hipsolver2char_operation(trans); dormqr_(&sideC, &transC, &m, &n, &k, A, &lda, ipiv, C, &ldc, work, &lwork, info); } template <> void cblas_ormqr_unmqr(hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipsolverComplex* A, int lda, hipsolverComplex* ipiv, hipsolverComplex* C, int ldc, hipsolverComplex* work, int lwork, int* info) { char sideC = hipsolver2char_side(side); char transC = hipsolver2char_operation(trans); cunmqr_(&sideC, &transC, &m, &n, &k, A, &lda, ipiv, C, &ldc, work, &lwork, info); } template <> void cblas_ormqr_unmqr(hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* ipiv, hipsolverDoubleComplex* C, int ldc, hipsolverDoubleComplex* work, int lwork, int* info) { char sideC = hipsolver2char_side(side); char transC = hipsolver2char_operation(trans); zunmqr_(&sideC, &transC, &m, &n, &k, A, &lda, ipiv, C, &ldc, work, &lwork, info); } // ormtr & unmtr template <> void cblas_ormtr_unmtr(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, float* A, int lda, float* ipiv, float* C, int ldc, float* work, int lwork, int* info) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(trans); sormtr_(&sideC, &uploC, &transC, &m, &n, A, &lda, ipiv, C, &ldc, work, &lwork, info); } template <> void cblas_ormtr_unmtr(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, double* A, int lda, double* ipiv, double* C, int ldc, double* work, int lwork, int* info) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(trans); dormtr_(&sideC, &uploC, &transC, &m, &n, A, &lda, ipiv, C, &ldc, work, &lwork, info); } template <> void cblas_ormtr_unmtr(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipsolverComplex* A, int lda, hipsolverComplex* ipiv, hipsolverComplex* C, int ldc, hipsolverComplex* work, int lwork, int* info) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(trans); cunmtr_(&sideC, &uploC, &transC, &m, &n, A, &lda, ipiv, C, &ldc, work, &lwork, info); } template <> void cblas_ormtr_unmtr(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* ipiv, hipsolverDoubleComplex* C, int ldc, hipsolverDoubleComplex* work, int lwork, int* info) { char sideC = hipsolver2char_side(side); char uploC = hipsolver2char_fill(uplo); char transC = hipsolver2char_operation(trans); zunmtr_(&sideC, &uploC, &transC, &m, &n, A, &lda, ipiv, C, &ldc, work, &lwork, info); } // gebrd template <> void cblas_gebrd(int m, int n, float* A, int lda, float* D, float* E, float* tauq, float* taup, float* work, int size_w, int* info) { sgebrd_(&m, &n, A, &lda, D, E, tauq, taup, work, &size_w, info); } template <> void cblas_gebrd(int m, int n, double* A, int lda, double* D, double* E, double* tauq, double* taup, double* work, int size_w, int* info) { dgebrd_(&m, &n, A, &lda, D, E, tauq, taup, work, &size_w, info); } template <> void cblas_gebrd(int m, int n, hipsolverComplex* A, int lda, float* D, float* E, hipsolverComplex* tauq, hipsolverComplex* taup, hipsolverComplex* work, int size_w, int* info) { cgebrd_(&m, &n, A, &lda, D, E, tauq, taup, work, &size_w, info); } template <> void cblas_gebrd(int m, int n, hipsolverDoubleComplex* A, int lda, double* D, double* E, hipsolverDoubleComplex* tauq, hipsolverDoubleComplex* taup, hipsolverDoubleComplex* work, int size_w, int* info) { zgebrd_(&m, &n, A, &lda, D, E, tauq, taup, work, &size_w, info); } // gels template <> void cblas_gels(hipsolverOperation_t transR, int m, int n, int nrhs, float* A, int lda, float* B, int ldb, float* work, int lwork, int* info) { char trans = hipsolver2char_operation(transR); sgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, info); } template <> void cblas_gels(hipsolverOperation_t transR, int m, int n, int nrhs, double* A, int lda, double* B, int ldb, double* work, int lwork, int* info) { char trans = hipsolver2char_operation(transR); dgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, info); } template <> void cblas_gels(hipsolverOperation_t transR, int m, int n, int nrhs, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, hipsolverComplex* work, int lwork, int* info) { char trans = hipsolver2char_operation(transR); cgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, info); } template <> void cblas_gels(hipsolverOperation_t transR, int m, int n, int nrhs, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, hipsolverDoubleComplex* work, int lwork, int* info) { char trans = hipsolver2char_operation(transR); zgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, info); } // geqrf template <> void cblas_geqrf( int m, int n, float* A, int lda, float* ipiv, float* work, int lwork, int* info) { sgeqrf_(&m, &n, A, &lda, ipiv, work, &lwork, info); } template <> void cblas_geqrf( int m, int n, double* A, int lda, double* ipiv, double* work, int lwork, int* info) { dgeqrf_(&m, &n, A, &lda, ipiv, work, &lwork, info); } template <> void cblas_geqrf(int m, int n, hipsolverComplex* A, int lda, hipsolverComplex* ipiv, hipsolverComplex* work, int lwork, int* info) { cgeqrf_(&m, &n, A, &lda, ipiv, work, &lwork, info); } template <> void cblas_geqrf(int m, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* ipiv, hipsolverDoubleComplex* work, int lwork, int* info) { zgeqrf_(&m, &n, A, &lda, ipiv, work, &lwork, info); } // gesv template <> void cblas_gesv(int n, int nrhs, float* A, int lda, int* ipiv, float* B, int ldb, int* info) { sgesv_(&n, &nrhs, A, &lda, ipiv, B, &ldb, info); } template <> void cblas_gesv( int n, int nrhs, double* A, int lda, int* ipiv, double* B, int ldb, int* info) { dgesv_(&n, &nrhs, A, &lda, ipiv, B, &ldb, info); } template <> void cblas_gesv(int n, int nrhs, hipsolverComplex* A, int lda, int* ipiv, hipsolverComplex* B, int ldb, int* info) { cgesv_(&n, &nrhs, A, &lda, ipiv, B, &ldb, info); } template <> void cblas_gesv(int n, int nrhs, hipsolverDoubleComplex* A, int lda, int* ipiv, hipsolverDoubleComplex* B, int ldb, int* info) { zgesv_(&n, &nrhs, A, &lda, ipiv, B, &ldb, info); } // gesvd template <> void cblas_gesvd(char jobu, char jobv, int m, int n, float* A, int lda, float* S, float* U, int ldu, float* V, int ldv, float* work, int lwork, float* E, int* info) { sgesvd_(&jobu, &jobv, &m, &n, A, &lda, S, U, &ldu, V, &ldv, E, &lwork, info); } template <> void cblas_gesvd(char jobu, char jobv, int m, int n, double* A, int lda, double* S, double* U, int ldu, double* V, int ldv, double* work, int lwork, double* E, int* info) { dgesvd_(&jobu, &jobv, &m, &n, A, &lda, S, U, &ldu, V, &ldv, E, &lwork, info); } template <> void cblas_gesvd(char jobu, char jobv, int m, int n, hipsolverComplex* A, int lda, float* S, hipsolverComplex* U, int ldu, hipsolverComplex* V, int ldv, hipsolverComplex* work, int lwork, float* E, int* info) { cgesvd_(&jobu, &jobv, &m, &n, A, &lda, S, U, &ldu, V, &ldv, work, &lwork, E, info); } template <> void cblas_gesvd(char jobu, char jobv, int m, int n, hipsolverDoubleComplex* A, int lda, double* S, hipsolverDoubleComplex* U, int ldu, hipsolverDoubleComplex* V, int ldv, hipsolverDoubleComplex* work, int lwork, double* E, int* info) { zgesvd_(&jobu, &jobv, &m, &n, A, &lda, S, U, &ldu, V, &ldv, work, &lwork, E, info); } // gesvdx template <> void cblas_gesvdx(hipsolverEigMode_t leftv, hipsolverEigMode_t rightv, char srange, int m, int n, float* A, int lda, float vl, float vu, int il, int iu, int* nsv, float* S, float* U, int ldu, float* V, int ldv, float* work, int lwork, float* rwork, int* iwork, int* info) { char jobu = hipsolver2char_evect(leftv); char jobv = hipsolver2char_evect(rightv); sgesvdx_(&jobu, &jobv, &srange, &m, &n, A, &lda, &vl, &vu, &il, &iu, nsv, S, U, &ldu, V, &ldv, work, &lwork, iwork, info); } template <> void cblas_gesvdx(hipsolverEigMode_t leftv, hipsolverEigMode_t rightv, char srange, int m, int n, double* A, int lda, double vl, double vu, int il, int iu, int* nsv, double* S, double* U, int ldu, double* V, int ldv, double* work, int lwork, double* rwork, int* iwork, int* info) { char jobu = hipsolver2char_evect(leftv); char jobv = hipsolver2char_evect(rightv); dgesvdx_(&jobu, &jobv, &srange, &m, &n, A, &lda, &vl, &vu, &il, &iu, nsv, S, U, &ldu, V, &ldv, work, &lwork, iwork, info); } template <> void cblas_gesvdx(hipsolverEigMode_t leftv, hipsolverEigMode_t rightv, char srange, int m, int n, hipsolverComplex* A, int lda, float vl, float vu, int il, int iu, int* nsv, float* S, hipsolverComplex* U, int ldu, hipsolverComplex* V, int ldv, hipsolverComplex* work, int lwork, float* rwork, int* iwork, int* info) { char jobu = hipsolver2char_evect(leftv); char jobv = hipsolver2char_evect(rightv); cgesvdx_(&jobu, &jobv, &srange, &m, &n, A, &lda, &vl, &vu, &il, &iu, nsv, S, U, &ldu, V, &ldv, work, &lwork, rwork, iwork, info); } template <> void cblas_gesvdx(hipsolverEigMode_t leftv, hipsolverEigMode_t rightv, char srange, int m, int n, hipsolverDoubleComplex* A, int lda, double vl, double vu, int il, int iu, int* nsv, double* S, hipsolverDoubleComplex* U, int ldu, hipsolverDoubleComplex* V, int ldv, hipsolverDoubleComplex* work, int lwork, double* rwork, int* iwork, int* info) { char jobu = hipsolver2char_evect(leftv); char jobv = hipsolver2char_evect(rightv); zgesvdx_(&jobu, &jobv, &srange, &m, &n, A, &lda, &vl, &vu, &il, &iu, nsv, S, U, &ldu, V, &ldv, work, &lwork, rwork, iwork, info); } // getrf template <> void cblas_getrf(int m, int n, float* A, int lda, int* ipiv, int* info) { sgetrf_(&m, &n, A, &lda, ipiv, info); } template <> void cblas_getrf(int m, int n, double* A, int lda, int* ipiv, int* info) { dgetrf_(&m, &n, A, &lda, ipiv, info); } template <> void cblas_getrf(int m, int n, hipsolverComplex* A, int lda, int* ipiv, int* info) { cgetrf_(&m, &n, A, &lda, ipiv, info); } template <> void cblas_getrf( int m, int n, hipsolverDoubleComplex* A, int lda, int* ipiv, int* info) { zgetrf_(&m, &n, A, &lda, ipiv, info); } // getrs template <> void cblas_getrs(hipsolverOperation_t trans, int n, int nrhs, float* A, int lda, int* ipiv, float* B, int ldb, int* info) { char transC = hipsolver2char_operation(trans); sgetrs_(&transC, &n, &nrhs, A, &lda, ipiv, B, &ldb, info); } template <> void cblas_getrs(hipsolverOperation_t trans, int n, int nrhs, double* A, int lda, int* ipiv, double* B, int ldb, int* info) { char transC = hipsolver2char_operation(trans); dgetrs_(&transC, &n, &nrhs, A, &lda, ipiv, B, &ldb, info); } template <> void cblas_getrs(hipsolverOperation_t trans, int n, int nrhs, hipsolverComplex* A, int lda, int* ipiv, hipsolverComplex* B, int ldb, int* info) { char transC = hipsolver2char_operation(trans); cgetrs_(&transC, &n, &nrhs, A, &lda, ipiv, B, &ldb, info); } template <> void cblas_getrs(hipsolverOperation_t trans, int n, int nrhs, hipsolverDoubleComplex* A, int lda, int* ipiv, hipsolverDoubleComplex* B, int ldb, int* info) { char transC = hipsolver2char_operation(trans); zgetrs_(&transC, &n, &nrhs, A, &lda, ipiv, B, &ldb, info); } // potrf template <> void cblas_potrf(hipsolverFillMode_t uplo, int n, float* A, int lda, int* info) { char uploC = hipsolver2char_fill(uplo); spotrf_(&uploC, &n, A, &lda, info); } template <> void cblas_potrf(hipsolverFillMode_t uplo, int n, double* A, int lda, int* info) { char uploC = hipsolver2char_fill(uplo); dpotrf_(&uploC, &n, A, &lda, info); } template <> void cblas_potrf( hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int* info) { char uploC = hipsolver2char_fill(uplo); cpotrf_(&uploC, &n, A, &lda, info); } template <> void cblas_potrf( hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int* info) { char uploC = hipsolver2char_fill(uplo); zpotrf_(&uploC, &n, A, &lda, info); } // potri template <> void cblas_potri(hipsolverFillMode_t uplo, int n, float* A, int lda, int* info) { char uploC = hipsolver2char_fill(uplo); spotri_(&uploC, &n, A, &lda, info); } template <> void cblas_potri(hipsolverFillMode_t uplo, int n, double* A, int lda, int* info) { char uploC = hipsolver2char_fill(uplo); dpotri_(&uploC, &n, A, &lda, info); } template <> void cblas_potri(hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int* info) { char uploC = hipsolver2char_fill(uplo); cpotri_(&uploC, &n, A, &lda, info); } template <> void cblas_potri(hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int* info) { char uploC = hipsolver2char_fill(uplo); zpotri_(&uploC, &n, A, &lda, info); } // potrs template <> void cblas_potrs( hipsolverFillMode_t uplo, int n, int nrhs, float* A, int lda, float* B, int ldb, int* info) { char uploC = hipsolver2char_fill(uplo); spotrs_(&uploC, &n, &nrhs, A, &lda, B, &ldb, info); } template <> void cblas_potrs( hipsolverFillMode_t uplo, int n, int nrhs, double* A, int lda, double* B, int ldb, int* info) { char uploC = hipsolver2char_fill(uplo); dpotrs_(&uploC, &n, &nrhs, A, &lda, B, &ldb, info); } template <> void cblas_potrs(hipsolverFillMode_t uplo, int n, int nrhs, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, int* info) { char uploC = hipsolver2char_fill(uplo); cpotrs_(&uploC, &n, &nrhs, A, &lda, B, &ldb, info); } template <> void cblas_potrs(hipsolverFillMode_t uplo, int n, int nrhs, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, int* info) { char uploC = hipsolver2char_fill(uplo); zpotrs_(&uploC, &n, &nrhs, A, &lda, B, &ldb, info); } // syevd & heevd template <> void cblas_syevd_heevd(hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, float* A, int lda, float* W, float* work, int lwork, float* rwork, int lrwork, int* iwork, int liwork, int* info) { char evectC = hipsolver2char_evect(evect); char uploC = hipsolver2char_fill(uplo); ssyevd_(&evectC, &uploC, &n, A, &lda, W, rwork, &lrwork, iwork, &liwork, info); } template <> void cblas_syevd_heevd(hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, double* A, int lda, double* W, double* work, int lwork, double* rwork, int lrwork, int* iwork, int liwork, int* info) { char evectC = hipsolver2char_evect(evect); char uploC = hipsolver2char_fill(uplo); dsyevd_(&evectC, &uploC, &n, A, &lda, W, rwork, &lrwork, iwork, &liwork, info); } template <> void cblas_syevd_heevd(hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, float* W, hipsolverComplex* work, int lwork, float* rwork, int lrwork, int* iwork, int liwork, int* info) { char evectC = hipsolver2char_evect(evect); char uploC = hipsolver2char_fill(uplo); cheevd_(&evectC, &uploC, &n, A, &lda, W, work, &lwork, rwork, &lrwork, iwork, &liwork, info); } template <> void cblas_syevd_heevd(hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, double* W, hipsolverDoubleComplex* work, int lwork, double* rwork, int lrwork, int* iwork, int liwork, int* info) { char evectC = hipsolver2char_evect(evect); char uploC = hipsolver2char_fill(uplo); zheevd_(&evectC, &uploC, &n, A, &lda, W, work, &lwork, rwork, &lrwork, iwork, &liwork, info); } // syevx & heevx template <> void cblas_syevx_heevx(hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, float* A, int lda, float vl, float vu, int il, int iu, float abstol, int* nev, float* W, float* Z, int ldz, float* work, int lwork, float* rwork, int* iwork, int* ifail, int* info) { char evectC = hipsolver2char_evect(evect); char erangeC = hipsolver2char_erange(erange); char uploC = hipsolver2char_fill(uplo); ssyevx_(&evectC, &erangeC, &uploC, &n, A, &lda, &vl, &vu, &il, &iu, &abstol, nev, W, Z, &ldz, work, &lwork, iwork, ifail, info); } template <> void cblas_syevx_heevx(hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, double* A, int lda, double vl, double vu, int il, int iu, double abstol, int* nev, double* W, double* Z, int ldz, double* work, int lwork, double* rwork, int* iwork, int* ifail, int* info) { char evectC = hipsolver2char_evect(evect); char erangeC = hipsolver2char_erange(erange); char uploC = hipsolver2char_fill(uplo); dsyevx_(&evectC, &erangeC, &uploC, &n, A, &lda, &vl, &vu, &il, &iu, &abstol, nev, W, Z, &ldz, work, &lwork, iwork, ifail, info); } template <> void cblas_syevx_heevx(hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, float vl, float vu, int il, int iu, float abstol, int* nev, float* W, hipsolverComplex* Z, int ldz, hipsolverComplex* work, int lwork, float* rwork, int* iwork, int* ifail, int* info) { char evectC = hipsolver2char_evect(evect); char erangeC = hipsolver2char_erange(erange); char uploC = hipsolver2char_fill(uplo); cheevx_(&evectC, &erangeC, &uploC, &n, A, &lda, &vl, &vu, &il, &iu, &abstol, nev, W, Z, &ldz, work, &lwork, rwork, iwork, ifail, info); } template <> void cblas_syevx_heevx(hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, double vl, double vu, int il, int iu, double abstol, int* nev, double* W, hipsolverDoubleComplex* Z, int ldz, hipsolverDoubleComplex* work, int lwork, double* rwork, int* iwork, int* ifail, int* info) { char evectC = hipsolver2char_evect(evect); char erangeC = hipsolver2char_erange(erange); char uploC = hipsolver2char_fill(uplo); zheevx_(&evectC, &erangeC, &uploC, &n, A, &lda, &vl, &vu, &il, &iu, &abstol, nev, W, Z, &ldz, work, &lwork, rwork, iwork, ifail, info); } // sygvd & hegvd template <> void cblas_sygvd_hegvd(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, float* A, int lda, float* B, int ldb, float* W, float* work, int lwork, float* rwork, int lrwork, int* iwork, int liwork, int* info) { int itypeI = hipsolver2char_eform(itype) - '0'; char evectC = hipsolver2char_evect(evect); char uploC = hipsolver2char_fill(uplo); ssygvd_( &itypeI, &evectC, &uploC, &n, A, &lda, B, &ldb, W, rwork, &lrwork, iwork, &liwork, info); } template <> void cblas_sygvd_hegvd(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, double* A, int lda, double* B, int ldb, double* W, double* work, int lwork, double* rwork, int lrwork, int* iwork, int liwork, int* info) { int itypeI = hipsolver2char_eform(itype) - '0'; char evectC = hipsolver2char_evect(evect); char uploC = hipsolver2char_fill(uplo); dsygvd_( &itypeI, &evectC, &uploC, &n, A, &lda, B, &ldb, W, rwork, &lrwork, iwork, &liwork, info); } template <> void cblas_sygvd_hegvd(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, float* W, hipsolverComplex* work, int lwork, float* rwork, int lrwork, int* iwork, int liwork, int* info) { int itypeI = hipsolver2char_eform(itype) - '0'; char evectC = hipsolver2char_evect(evect); char uploC = hipsolver2char_fill(uplo); chegvd_(&itypeI, &evectC, &uploC, &n, A, &lda, B, &ldb, W, work, &lwork, rwork, &lrwork, iwork, &liwork, info); } template <> void cblas_sygvd_hegvd(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, double* W, hipsolverDoubleComplex* work, int lwork, double* rwork, int lrwork, int* iwork, int liwork, int* info) { int itypeI = hipsolver2char_eform(itype) - '0'; char evectC = hipsolver2char_evect(evect); char uploC = hipsolver2char_fill(uplo); zhegvd_(&itypeI, &evectC, &uploC, &n, A, &lda, B, &ldb, W, work, &lwork, rwork, &lrwork, iwork, &liwork, info); } // sygvx & hegvx template <> void cblas_sygvx_hegvx(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, float* A, int lda, float* B, int ldb, float vl, float vu, int il, int iu, float abstol, int* nev, float* W, float* Z, int ldz, float* work, int lwork, float* rwork, int* iwork, int* ifail, int* info) { int itypeI = hipsolver2char_eform(itype) - '0'; char evectC = hipsolver2char_evect(evect); char erangeC = hipsolver2char_erange(erange); char uploC = hipsolver2char_fill(uplo); ssygvx_(&itypeI, &evectC, &erangeC, &uploC, &n, A, &lda, B, &ldb, &vl, &vu, &il, &iu, &abstol, nev, W, Z, &ldz, work, &lwork, iwork, ifail, info); } template <> void cblas_sygvx_hegvx(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, double* A, int lda, double* B, int ldb, double vl, double vu, int il, int iu, double abstol, int* nev, double* W, double* Z, int ldz, double* work, int lwork, double* rwork, int* iwork, int* ifail, int* info) { int itypeI = hipsolver2char_eform(itype) - '0'; char evectC = hipsolver2char_evect(evect); char erangeC = hipsolver2char_erange(erange); char uploC = hipsolver2char_fill(uplo); dsygvx_(&itypeI, &evectC, &erangeC, &uploC, &n, A, &lda, B, &ldb, &vl, &vu, &il, &iu, &abstol, nev, W, Z, &ldz, work, &lwork, iwork, ifail, info); } template <> void cblas_sygvx_hegvx(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, float vl, float vu, int il, int iu, float abstol, int* nev, float* W, hipsolverComplex* Z, int ldz, hipsolverComplex* work, int lwork, float* rwork, int* iwork, int* ifail, int* info) { int itypeI = hipsolver2char_eform(itype) - '0'; char evectC = hipsolver2char_evect(evect); char erangeC = hipsolver2char_erange(erange); char uploC = hipsolver2char_fill(uplo); chegvx_(&itypeI, &evectC, &erangeC, &uploC, &n, A, &lda, B, &ldb, &vl, &vu, &il, &iu, &abstol, nev, W, Z, &ldz, work, &lwork, rwork, iwork, ifail, info); } template <> void cblas_sygvx_hegvx(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, double vl, double vu, int il, int iu, double abstol, int* nev, double* W, hipsolverDoubleComplex* Z, int ldz, hipsolverDoubleComplex* work, int lwork, double* rwork, int* iwork, int* ifail, int* info) { int itypeI = hipsolver2char_eform(itype) - '0'; char evectC = hipsolver2char_evect(evect); char erangeC = hipsolver2char_erange(erange); char uploC = hipsolver2char_fill(uplo); zhegvx_(&itypeI, &evectC, &erangeC, &uploC, &n, A, &lda, B, &ldb, &vl, &vu, &il, &iu, &abstol, nev, W, Z, &ldz, work, &lwork, rwork, iwork, ifail, info); } // sytrd & hetrd template <> void cblas_sytrd_hetrd(hipsolverFillMode_t uplo, int n, float* A, int lda, float* D, float* E, float* tau, float* work, int size_w) { int info; char uploC = hipsolver2char_fill(uplo); ssytrd_(&uploC, &n, A, &lda, D, E, tau, work, &size_w, &info); } template <> void cblas_sytrd_hetrd(hipsolverFillMode_t uplo, int n, double* A, int lda, double* D, double* E, double* tau, double* work, int size_w) { int info; char uploC = hipsolver2char_fill(uplo); dsytrd_(&uploC, &n, A, &lda, D, E, tau, work, &size_w, &info); } template <> void cblas_sytrd_hetrd(hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, float* D, float* E, hipsolverComplex* tau, hipsolverComplex* work, int size_w) { int info; char uploC = hipsolver2char_fill(uplo); chetrd_(&uploC, &n, A, &lda, D, E, tau, work, &size_w, &info); } template <> void cblas_sytrd_hetrd(hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, double* D, double* E, hipsolverDoubleComplex* tau, hipsolverDoubleComplex* work, int size_w) { int info; char uploC = hipsolver2char_fill(uplo); zhetrd_(&uploC, &n, A, &lda, D, E, tau, work, &size_w, &info); } // sytrf template <> void cblas_sytrf(hipsolverFillMode_t uplo, int n, float* A, int lda, int* ipiv, float* work, int lwork, int* info) { char uploC = hipsolver2char_fill(uplo); ssytrf_(&uploC, &n, A, &lda, ipiv, work, &lwork, info); } template <> void cblas_sytrf(hipsolverFillMode_t uplo, int n, double* A, int lda, int* ipiv, double* work, int lwork, int* info) { char uploC = hipsolver2char_fill(uplo); dsytrf_(&uploC, &n, A, &lda, ipiv, work, &lwork, info); } template <> void cblas_sytrf(hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int* ipiv, hipsolverComplex* work, int lwork, int* info) { char uploC = hipsolver2char_fill(uplo); csytrf_(&uploC, &n, A, &lda, ipiv, work, &lwork, info); } template <> void cblas_sytrf(hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int* ipiv, hipsolverDoubleComplex* work, int lwork, int* info) { char uploC = hipsolver2char_fill(uplo); zsytrf_(&uploC, &n, A, &lda, ipiv, work, &lwork, info); } hipSOLVER-rocm-5.5.1/clients/common/utility.cpp000066400000000000000000000117111436107207300213370ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include #include "hipsolver.h" #include "utility.hpp" hipsolver_rng_t hipsolver_rng(69069); hipsolver_rng_t hipsolver_seed(hipsolver_rng); template <> char type2char() { return 's'; } template <> char type2char() { return 'd'; } template <> char type2char() { return 'c'; } template <> char type2char() { return 'z'; } template <> int type2int(float val) { return (int)val; } template <> int type2int(double val) { return (int)val; } template <> int type2int(hipsolverComplex val) { return (int)val.real(); } template <> int type2int(hipsolverDoubleComplex val) { return (int)val.real(); } #ifdef __cplusplus extern "C" { #endif /* ============================================================================================ */ /* timing:*/ /* CPU Timer (in microseconds): no GPU synchronization */ double get_time_us_no_sync() { namespace sc = std::chrono; const sc::steady_clock::time_point t = sc::steady_clock::now(); return double(sc::duration_cast(t.time_since_epoch()).count()); } /* CPU Timer (in microseconds): synchronize with the default device and return wall time */ double get_time_us() { hipDeviceSynchronize(); return get_time_us_no_sync(); } /* CPU Timer (in microseconds): synchronize with given queue/stream and return wall time */ double get_time_us_sync(hipStream_t stream) { hipStreamSynchronize(stream); return get_time_us_no_sync(); } /* ============================================================================================ */ /* device query and print out their ID and name; return number of compute-capable devices. */ int query_device_property() { int device_count; hipsolverStatus_t count_status = (hipsolverStatus_t)hipGetDeviceCount(&device_count); if(count_status != HIPSOLVER_STATUS_SUCCESS) { printf("Query device error: cannot get device count \n"); return -1; } else { printf("Query device success: there are %d devices \n", device_count); } for(int i = 0; i < device_count; i++) { hipDeviceProp_t props; hipsolverStatus_t props_status = (hipsolverStatus_t)hipGetDeviceProperties(&props, i); if(props_status != HIPSOLVER_STATUS_SUCCESS) { printf("Query device error: cannot get device ID %d's property\n", i); } else { printf("Device ID %d : %s ------------------------------------------------------\n", i, props.name); printf("with %3.1f GB memory, clock rate %dMHz @ computing capability %d.%d \n", props.totalGlobalMem / 1e9, (int)(props.clockRate / 1000), props.major, props.minor); printf( "maxGridDimX %d, sharedMemPerBlock %3.1f KB, maxThreadsPerBlock %d, warpSize %d\n", props.maxGridSize[0], props.sharedMemPerBlock / 1e3, props.maxThreadsPerBlock, props.warpSize); printf("-------------------------------------------------------------------------\n"); } } return device_count; } /* set current device to device_id */ void set_device(int device_id) { hipsolverStatus_t status = (hipsolverStatus_t)hipSetDevice(device_id); if(status != HIPSOLVER_STATUS_SUCCESS) { printf("Set device error: cannot set device ID %d, there may not be such device ID\n", (int)device_id); } } #ifdef __cplusplus } #endif hipSOLVER-rocm-5.5.1/clients/gtest/000077500000000000000000000000001436107207300167655ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/clients/gtest/CMakeLists.txt000066400000000000000000000115131436107207300215260ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## # Linking lapack library requires fortran flags find_package( cblas REQUIRED CONFIG ) if( NOT TARGET hipsolver ) find_package( hipsolver REQUIRED CONFIG PATHS /opt/rocm/hipsolver ) endif( ) find_package( GTest REQUIRED ) set(hipsolver_test_source hipsolver_gtest_main.cpp getrs_gtest.cpp getrf_gtest.cpp gebrd_gtest.cpp gels_gtest.cpp geqrf_gtest.cpp gesv_gtest.cpp gesvd_gtest.cpp gesvda_gtest.cpp gesvdj_gtest.cpp potrf_gtest.cpp potri_gtest.cpp potrs_gtest.cpp syevd_heevd_gtest.cpp syevj_heevj_gtest.cpp sygvd_hegvd_gtest.cpp sygvj_hegvj_gtest.cpp syevdx_heevdx_gtest.cpp sygvdx_hegvdx_gtest.cpp sytrd_hetrd_gtest.cpp sytrf_gtest.cpp orgbr_ungbr_gtest.cpp orgqr_ungqr_gtest.cpp orgtr_ungtr_gtest.cpp ormqr_unmqr_gtest.cpp ormtr_unmtr_gtest.cpp ) set( hipsolver_test_common ../common/lapack_host_reference.cpp ../common/hipsolver_datatype2string.cpp ../common/utility.cpp ) add_executable( hipsolver-test ${hipsolver_test_source} ${hipsolver_test_common} ) if( UNIX ) target_sources( hipsolver-test PRIVATE ${hipsolver_f90_source} ) endif( ) target_include_directories( hipsolver-test PRIVATE $ ) set( THREADS_PREFER_PTHREAD_FLAG ON ) find_package( Threads REQUIRED ) add_armor_flags( hipsolver-test "${ARMOR_LEVEL}" ) target_compile_definitions( hipsolver-test PRIVATE GOOGLE_TEST ) # External header includes included as SYSTEM files target_include_directories( hipsolver-test SYSTEM PRIVATE $ $ ) target_link_libraries( hipsolver-test PRIVATE cblas lapack blas GTest::GTest Threads::Threads roc::hipsolver ) if( UNIX ) target_link_libraries( hipsolver-test PRIVATE hipsolver_fortran_client ) endif( ) target_link_libraries(hipsolver-test PRIVATE $ ) # need mf16c flag for float->half convertion target_compile_options( hipsolver-test PRIVATE -mf16c ) if( NOT USE_CUDA ) target_link_libraries( hipsolver-test PRIVATE hip::host ) if( CUSTOM_TARGET ) target_link_libraries( hipsolver-test PRIVATE hip::${CUSTOM_TARGET} ) endif( ) if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) # hip-clang needs specific flag to turn on pthread and m target_link_libraries( hipsolver-test PRIVATE -lpthread -lm ) endif() else( ) target_compile_definitions( hipsolver-test PRIVATE __HIP_PLATFORM_NVCC__ ) target_include_directories( hipsolver-test PRIVATE $ ) target_link_libraries( hipsolver-test PRIVATE ${CUDA_LIBRARIES} Threads::Threads ) endif( ) set_target_properties( hipsolver-test PROPERTIES DEBUG_POSTFIX "-d" CXX_EXTENSIONS NO ) set_target_properties( hipsolver-test PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) rocm_install(TARGETS hipsolver-test COMPONENT tests) if(WIN32) file(GLOB third_party_dlls LIST_DIRECTORIES OFF CONFIGURE_DEPENDS ${cblas_DIR}/bin/*.dll ${GTest_DIR}/bin/*.dll $ENV{rocblas_DIR}/bin/*.dll $ENV{rocsolver_DIR}/bin/*.dll $ENV{HIP_DIR}/bin/*.dll $ENV{HIP_DIR}/bin/hipinfo.exe ${CMAKE_SOURCE_DIR}/rtest.* ) foreach(file_i ${third_party_dlls}) add_custom_command(TARGET hipsolver-test POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy ${file_i} ${PROJECT_BINARY_DIR}/staging/ ) endforeach() add_custom_command(TARGET hipsolver-test POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory $ENV{rocblas_DIR}/bin/rocblas/library ${PROJECT_BINARY_DIR}/staging/library ) endif() hipSOLVER-rocm-5.5.1/clients/gtest/gebrd_gtest.cpp000066400000000000000000000111101436107207300217540ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_gebrd.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, int> gebrd_tuple; // each matrix_size_range is a {m, lda} // case when m = n = -1 will also execute the bad arguments test // (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> matrix_size_range = { // invalid {-1, 1}, {20, 5}, // normal (valid) samples {50, 50}, {70, 100}, {130, 130}, {150, 200}}; const vector n_size_range = { // invalid -1, // normal (valid) samples 16, 20, 40, 50}; // // for daily_lapack tests // const vector> large_matrix_size_range = { // {152, 152}, // {640, 640}, // {1000, 1024}, // }; // const vector large_n_size_range = {64, 98, 130, 152}; Arguments gebrd_setup_arguments(gebrd_tuple tup) { vector matrix_size = std::get<0>(tup); int n_size = std::get<1>(tup); Arguments arg; arg.set("m", matrix_size[0]); arg.set("n", n_size); arg.set("lda", matrix_size[1]); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class GEBRD_BASE : public ::TestWithParam { protected: GEBRD_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = gebrd_setup_arguments(GetParam()); if(arg.peek("m") == -1 && arg.peek("n") == -1) testing_gebrd_bad_arg(); arg.batch_count = 1; testing_gebrd(arg); } }; class GEBRD : public GEBRD_BASE { }; class GEBRD_FORTRAN : public GEBRD_BASE { }; // non-batch tests TEST_P(GEBRD, __float) { run_tests(); } TEST_P(GEBRD, __double) { run_tests(); } TEST_P(GEBRD, __float_complex) { run_tests(); } TEST_P(GEBRD, __double_complex) { run_tests(); } TEST_P(GEBRD_FORTRAN, __float) { run_tests(); } TEST_P(GEBRD_FORTRAN, __double) { run_tests(); } TEST_P(GEBRD_FORTRAN, __float_complex) { run_tests(); } TEST_P(GEBRD_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GEBRD, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GEBRD, Combine(ValuesIn(matrix_size_range), ValuesIn(n_size_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GEBRD_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GEBRD_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(n_size_range))); hipSOLVER-rocm-5.5.1/clients/gtest/gels_gtest.cpp000066400000000000000000000152651436107207300216420ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_gels.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple gels_params_A; typedef std::tuple gels_tuple; // each A_range tuple is a {M, N, lda, ldb}; // each B_range tuple is a {nrhs}; // case when N = nrhs = -1 will also execute the bad arguments test // (null handle, null pointers and invalid values) // for checkin_lapack tests const vector matrix_sizeA_range = { // invalid {-1, 1, 1, 1}, {1, -1, 1, 1}, {10, 10, 10, 1}, {10, 10, 1, 10}, // normal (valid) samples {20, 20, 20, 20}, {30, 20, 40, 30}, {40, 20, 40, 40}, }; const vector matrix_sizeB_range = { // invalid -1, // normal (valid) samples 10, 20, 30}; // // for daily_lapack tests // const vector large_matrix_sizeA_range = { // {75, 25, 75, 75}, // {150, 150, 150, 150}, // }; // const vector large_matrix_sizeB_range = { // 100, // 200, // 500, // 1000, // }; Arguments gels_setup_arguments(gels_tuple tup) { gels_params_A matrix_sizeA = std::get<0>(tup); int matrix_sizeB = std::get<1>(tup); Arguments arg; arg.set("m", std::get<0>(matrix_sizeA)); arg.set("n", std::get<1>(matrix_sizeA)); arg.set("lda", std::get<2>(matrix_sizeA)); arg.set("ldb", std::get<3>(matrix_sizeA)); arg.set("ldx", std::get<3>(matrix_sizeA)); arg.set("nrhs", matrix_sizeB); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class GELS_BASE : public ::TestWithParam { protected: GELS_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = gels_setup_arguments(GetParam()); if(arg.peek("n") == -1 && arg.peek("nrhs") == -1) testing_gels_bad_arg(); arg.batch_count = 1; testing_gels(arg); } }; class GELS : public GELS_BASE { }; class GELS_FORTRAN : public GELS_BASE { }; class GELS_COMPAT : public GELS_BASE { }; class GELS_INPLACE : public GELS_BASE { }; // non-batch tests TEST_P(GELS, __float) { run_tests(); } TEST_P(GELS, __double) { run_tests(); } TEST_P(GELS, __float_complex) { run_tests(); } TEST_P(GELS, __double_complex) { run_tests(); } TEST_P(GELS_FORTRAN, __float) { run_tests(); } TEST_P(GELS_FORTRAN, __double) { run_tests(); } TEST_P(GELS_FORTRAN, __float_complex) { run_tests(); } TEST_P(GELS_FORTRAN, __double_complex) { run_tests(); } TEST_P(GELS_COMPAT, __float) { run_tests(); } TEST_P(GELS_COMPAT, __double) { run_tests(); } TEST_P(GELS_COMPAT, __float_complex) { run_tests(); } TEST_P(GELS_COMPAT, __double_complex) { run_tests(); } TEST_P(GELS_INPLACE, __float) { run_tests(); } TEST_P(GELS_INPLACE, __double) { run_tests(); } TEST_P(GELS_INPLACE, __float_complex) { run_tests(); } TEST_P(GELS_INPLACE, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GELS, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GELS, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GELS_FORTRAN, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GELS_FORTRAN, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GELS_COMPAT, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GELS_COMPAT, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GELS_INPLACE, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GELS_INPLACE, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); hipSOLVER-rocm-5.5.1/clients/gtest/geqrf_gtest.cpp000066400000000000000000000111171436107207300220040ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_geqrf.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, int> geqrf_tuple; // each matrix_size_range is a {m, lda} // case when m = n = -1 will also execute the bad arguments test // (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> matrix_size_range = { // invalid {-1, 1}, {20, 5}, // normal (valid) samples {50, 50}, {70, 100}, {130, 130}, {150, 200}}; const vector n_size_range = { // invalid -1, // normal (valid) samples 16, 20, 130, 150}; // // for daily_lapack tests // const vector> large_matrix_size_range = { // {152, 152}, // {640, 640}, // {1000, 1024}, // }; // const vector large_n_size_range = {64, 98, 130, 220, 400}; Arguments geqrf_setup_arguments(geqrf_tuple tup) { vector matrix_size = std::get<0>(tup); int n_size = std::get<1>(tup); Arguments arg; arg.set("m", matrix_size[0]); arg.set("n", n_size); arg.set("lda", matrix_size[1]); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class GEQRF_BASE : public ::TestWithParam { protected: GEQRF_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = geqrf_setup_arguments(GetParam()); if(arg.peek("m") == -1 && arg.peek("n") == -1) testing_geqrf_bad_arg(); arg.batch_count = 1; testing_geqrf(arg); } }; class GEQRF : public GEQRF_BASE { }; class GEQRF_FORTRAN : public GEQRF_BASE { }; // non-batch tests TEST_P(GEQRF, __float) { run_tests(); } TEST_P(GEQRF, __double) { run_tests(); } TEST_P(GEQRF, __float_complex) { run_tests(); } TEST_P(GEQRF, __double_complex) { run_tests(); } TEST_P(GEQRF_FORTRAN, __float) { run_tests(); } TEST_P(GEQRF_FORTRAN, __double) { run_tests(); } TEST_P(GEQRF_FORTRAN, __float_complex) { run_tests(); } TEST_P(GEQRF_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GEQRF, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GEQRF, Combine(ValuesIn(matrix_size_range), ValuesIn(n_size_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GEQRF_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GEQRF_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(n_size_range))); hipSOLVER-rocm-5.5.1/clients/gtest/gesv_gtest.cpp000066400000000000000000000153011436107207300216430ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_gesv.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, int> gesv_tuple; // each A_range vector is a {N, lda, ldb/ldx}; // each B_range vector is a {nrhs}; // case when N = nrhs = -1 will also execute the bad arguments test // (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> matrix_sizeA_range = { // invalid {-1, 1, 1}, {10, 2, 10}, {10, 10, 2}, /// normal (valid) samples {20, 20, 20}, {30, 50, 30}, {30, 30, 50}, {50, 60, 60}}; const vector matrix_sizeB_range = { // invalid -1, // normal (valid) samples 10, 20, 30, }; // // for daily_lapack tests // const vector> large_matrix_sizeA_range // = {{70, 70, 100}, {192, 192, 192}, {600, 700, 645}, {1000, 1000, 1000}, {1000, 2000, 2000}}; // const vector large_matrix_sizeB_range = { // 100, // 150, // 200, // 524, // 1000, // }; Arguments gesv_setup_arguments(gesv_tuple tup) { vector matrix_sizeA = std::get<0>(tup); int matrix_sizeB = std::get<1>(tup); Arguments arg; arg.set("n", matrix_sizeA[0]); arg.set("nrhs", matrix_sizeB); arg.set("lda", matrix_sizeA[1]); arg.set("ldb", matrix_sizeA[2]); arg.set("ldx", matrix_sizeA[2]); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class GESV_BASE : public ::TestWithParam { protected: GESV_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = gesv_setup_arguments(GetParam()); if(arg.peek("n") == -1 && arg.peek("nrhs") == -1) testing_gesv_bad_arg(); arg.batch_count = 1; testing_gesv(arg); } }; class GESV : public GESV_BASE { }; class GESV_FORTRAN : public GESV_BASE { }; class GESV_COMPAT : public GESV_BASE { }; class GESV_INPLACE : public GESV_BASE { }; // non-batch tests TEST_P(GESV, __float) { run_tests(); } TEST_P(GESV, __double) { run_tests(); } TEST_P(GESV, __float_complex) { run_tests(); } TEST_P(GESV, __double_complex) { run_tests(); } TEST_P(GESV_FORTRAN, __float) { run_tests(); } TEST_P(GESV_FORTRAN, __double) { run_tests(); } TEST_P(GESV_FORTRAN, __float_complex) { run_tests(); } TEST_P(GESV_FORTRAN, __double_complex) { run_tests(); } TEST_P(GESV_COMPAT, __float) { run_tests(); } TEST_P(GESV_COMPAT, __double) { run_tests(); } TEST_P(GESV_COMPAT, __float_complex) { run_tests(); } TEST_P(GESV_COMPAT, __double_complex) { run_tests(); } #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) TEST_P(GESV_INPLACE, __float) { run_tests(); } TEST_P(GESV_INPLACE, __double) { run_tests(); } TEST_P(GESV_INPLACE, __float_complex) { run_tests(); } TEST_P(GESV_INPLACE, __double_complex) { run_tests(); } #endif // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESV, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESV, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESV_FORTRAN, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESV_FORTRAN, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESV_COMPAT, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESV_COMPAT, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESV_INPLACE, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESV_INPLACE, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); #endif hipSOLVER-rocm-5.5.1/clients/gtest/gesvd_gtest.cpp000066400000000000000000000202161436107207300220100ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_gesvd.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> gesvd_tuple; // each size_range vector is a {m, n, fa}; // if fa = 0 then no fast algorithm is allowed // if fa = 1 fast algorithm is used when possible // each opt_range vector is a {lda, ldu, ldv, leftsv, rightsv}; // if ldx = -1 then ldx < limit (invalid size) // if ldx = 0 then ldx = limit // if ldx = 1 then ldx > limit // if leftsv (rightsv) = 0 then overwrite singular vectors // if leftsv (rightsv) = 1 then compute singular vectors // if leftsv (rightsv) = 2 then compute all orthogonal matrix // if leftsv (rightsv) = 3 then no singular vectors are computed // case when m = -1, n = 1, and rightsv = leftsv = 3 will also execute the bad // arguments test (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> size_range = { // invalid {-1, 1, 0}, {1, -1, 0}, // normal (valid) samples {1, 1, 0}, {20, 20, 0}, {40, 30, 0}, {60, 30, 0}}; const vector> opt_range = { // invalid {-1, 0, 0, 2, 2}, {0, -1, 0, 1, 2}, {0, 0, -1, 2, 1}, {0, 0, 0, 0, 0}, // normal (valid) samples {1, 1, 1, 3, 3}, {0, 0, 1, 3, 2}, {0, 1, 0, 3, 1}, {0, 1, 1, 3, 0}, {1, 0, 0, 2, 3}, {1, 0, 1, 2, 2}, {1, 1, 0, 2, 1}, {0, 0, 0, 2, 0}, {0, 0, 0, 1, 3}, {0, 0, 0, 1, 2}, {0, 0, 0, 1, 1}, {0, 0, 0, 1, 0}, {0, 0, 0, 0, 3}, {0, 0, 0, 0, 2}, {0, 0, 0, 0, 1}}; // // for daily_lapack tests // const vector> large_size_range = {{120, 100, 0}, {300, 120, 0}}; // const vector> large_opt_range = {{0, 0, 0, 3, 3}, // {1, 0, 0, 0, 1}, // {0, 1, 0, 1, 0}, // {0, 0, 1, 1, 1}, // {0, 0, 0, 3, 0}, // {0, 0, 0, 1, 3}, // {0, 0, 0, 3, 2}}; Arguments gesvd_setup_arguments(gesvd_tuple tup) { vector size = std::get<0>(tup); vector opt = std::get<1>(tup); Arguments arg; // sizes rocblas_int m = size[0]; rocblas_int n = size[1]; arg.set("m", m); arg.set("n", n); // // fast algorithm // if(size[2] == 0) // arg.set("fast_alg", 'I'); // else // arg.set("fast_alg", 'O'); // leading dimensions arg.set("lda", m + opt[0] * 10); arg.set("ldu", m + opt[1] * 10); if(opt[4] == 2) arg.set("ldv", n + opt[2] * 10); else arg.set("ldv", min(m, n) + opt[2] * 10); // vector options if(opt[3] == 0) arg.set("jobu", 'O'); else if(opt[3] == 1) arg.set("jobu", 'S'); else if(opt[3] == 2) arg.set("jobu", 'A'); else arg.set("jobu", 'N'); if(opt[4] == 0) arg.set("jobv", 'O'); else if(opt[4] == 1) arg.set("jobv", 'S'); else if(opt[4] == 2) arg.set("jobv", 'A'); else arg.set("jobv", 'N'); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class GESVD_BASE : public ::TestWithParam { protected: GESVD_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = gesvd_setup_arguments(GetParam()); if(arg.peek("m") == -1 && arg.peek("n") == 1 && arg.peek("jobu") == 'N' && arg.peek("jobv") == 'N') testing_gesvd_bad_arg(); arg.batch_count = 1; testing_gesvd(arg); } }; class GESVD : public GESVD_BASE { }; class GESVD_FORTRAN : public GESVD_BASE { }; class GESVD_COMPAT : public GESVD_BASE { }; class GESVD_NRWK : public GESVD_BASE { }; // non-batch tests TEST_P(GESVD, __float) { run_tests(); } TEST_P(GESVD, __double) { run_tests(); } TEST_P(GESVD, __float_complex) { run_tests(); } TEST_P(GESVD, __double_complex) { run_tests(); } TEST_P(GESVD_FORTRAN, __float) { run_tests(); } TEST_P(GESVD_FORTRAN, __double) { run_tests(); } TEST_P(GESVD_FORTRAN, __float_complex) { run_tests(); } TEST_P(GESVD_FORTRAN, __double_complex) { run_tests(); } TEST_P(GESVD_COMPAT, __float) { run_tests(); } TEST_P(GESVD_COMPAT, __double) { run_tests(); } TEST_P(GESVD_COMPAT, __float_complex) { run_tests(); } TEST_P(GESVD_COMPAT, __double_complex) { run_tests(); } TEST_P(GESVD_NRWK, __float) { run_tests(); } TEST_P(GESVD_NRWK, __double) { run_tests(); } TEST_P(GESVD_NRWK, __float_complex) { run_tests(); } TEST_P(GESVD_NRWK, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESVD, // Combine(ValuesIn(large_size_range), ValuesIn(large_opt_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESVD, Combine(ValuesIn(size_range), ValuesIn(opt_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESVD_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(large_opt_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESVD_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(opt_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESVD_COMPAT, // Combine(ValuesIn(large_size_range), ValuesIn(large_opt_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESVD_COMPAT, Combine(ValuesIn(size_range), ValuesIn(opt_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESVD_NRWK, // Combine(ValuesIn(large_size_range), ValuesIn(large_opt_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESVD_NRWK, Combine(ValuesIn(size_range), ValuesIn(opt_range))); hipSOLVER-rocm-5.5.1/clients/gtest/gesvda_gtest.cpp000066400000000000000000000073361436107207300221610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. * * ************************************************************************ */ #include "testing_gesvda.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> gesvda_tuple; // each size_range vector is a {m, n, lda, ldu, ldv}; // if ldx = -1 then ldx < limit (invalid size) // if ldx = 0 then ldx = limit // if ldx = 1 then ldx > limit // each opt_range vector is a {vect, rank}; // if vect = 1 then compute singular vectors // if vect = 0 then no singular vectors are computed // case when m = n = 0, vect = 0 and rank = 1 will also execute the bad // arguments test (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> size_range = { {20, 20, 0, 0, 0}, {40, 30, 0, 0, 0}, {30, 30, 1, 0, 0}, {60, 40, 0, 1, 0}, {50, 50, 1, 1, 1}}; const vector> opt_range = { {0, 5}, {0, 15}, {1, 5}, {1, 20}, }; // // for daily_lapack tests // const vector> large_size_range // = {{100, 100, 1, 0, 0}, {300, 120, 0, 0, 1}, {200, 300, 0, 0, 0}}; // const vector> large_opt_range = {{0, 100}, {1, 10}, {1, 20}}; Arguments gesvda_setup_arguments(gesvda_tuple tup) { vector size = std::get<0>(tup); vector opt = std::get<1>(tup); Arguments arg; // sizes rocblas_int m = size[0]; rocblas_int n = size[1]; arg.set("m", m); arg.set("n", n); // leading dimensions arg.set("lda", m + size[2] * 10); arg.set("ldu", m + size[3] * 10); arg.set("ldv", min(m, n) + size[4] * 10); // vector options if(opt[0] == 0) arg.set("jobz", 'N'); else arg.set("jobz", 'V'); // ranges arg.set("rank", opt[1]); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class GESVDA_BASE : public ::TestWithParam { protected: GESVDA_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = gesvda_setup_arguments(GetParam()); if(arg.peek("m") == 0 && arg.peek("n") == 0 && arg.peek("jobz") == 'N' && arg.peek("rank") == 1) testing_gesvda_bad_arg(); arg.batch_count = (BATCHED || STRIDED ? 3 : 1); testing_gesvda(arg); } }; class GESVDA_COMPAT : public GESVDA_BASE { }; // strided_batched tests TEST_P(GESVDA_COMPAT, strided_batched__float) { run_tests(); } TEST_P(GESVDA_COMPAT, strided_batched__double) { run_tests(); } TEST_P(GESVDA_COMPAT, strided_batched__float_complex) { run_tests(); } TEST_P(GESVDA_COMPAT, strided_batched__double_complex) { run_tests(); } // // daily_lapack tests normal execution with medium to large sizes // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESVDA_COMPAT, // Combine(ValuesIn(large_size_range), ValuesIn(large_opt_range))); // checkin_lapack tests normal execution with small sizes, invalid sizes, // quick returns, and corner cases INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESVDA_COMPAT, Combine(ValuesIn(size_range), ValuesIn(opt_range))); hipSOLVER-rocm-5.5.1/clients/gtest/gesvdj_gtest.cpp000066400000000000000000000152131436107207300221630ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_gesvdj.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> gesvdj_tuple; // each size_range vector is a {m, n, fa}; // if fa = 0 then no fast algorithm is allowed // if fa = 1 fast algorithm is used when possible // each opt_range vector is a {lda, ldu, ldv, jobz, econ}; // if ldx = -1 then ldx < limit (invalid size) // if ldx = 0 then ldx = limit // if ldx = 1 then ldx > limit // if jobz = 0 then no singular vectors are computed // if jobz = 1 then compute singular vectors // case when m = 1, n = 1, jobz = 3, and econ = 0 will also execute the bad // arguments test (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> size_range = { // normal (valid) samples {1, 1, 0}, {20, 20, 0}, {30, 30, 0}, {32, 30, 0}, {4, 32, 0}, {32, 4, 0}, }; const vector> opt_range = { // normal (valid) samples {1, 1, 1, 0, 0}, {0, 0, 1, 0, 0}, {0, 1, 0, 0, 0}, {1, 0, 0, 1, 1}, {1, 0, 1, 1, 0}, {1, 1, 0, 1, 0}, {0, 0, 0, 1, 1}, }; // // for daily_lapack tests // const vector> large_size_range = {{120, 100, 0}, {300, 120, 0}}; // const vector> large_opt_range = {{0, 0, 0, 0, 0}, // {0, 0, 1, 1, 1}, // {0, 1, 0, 1, 0}, // {1, 0, 0, 0, 0}}; template Arguments gesvdj_setup_arguments(gesvdj_tuple tup, bool STRIDED) { vector size = std::get<0>(tup); vector opt = std::get<1>(tup); Arguments arg; // sizes rocblas_int m = size[0]; rocblas_int n = size[1]; arg.set("m", m); arg.set("n", n); // leading dimensions arg.set("lda", m + opt[0] * 10); arg.set("ldu", m + opt[1] * 10); arg.set("ldv", n + opt[2] * 10); // vector options if(opt[3] == 0) arg.set("jobz", 'N'); else arg.set("jobz", 'V'); if(!STRIDED) arg.set("econ", opt[4]); arg.set("tolerance", 2 * get_epsilon()); arg.set("max_sweeps", 100); arg.set("sort_eig", 1); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class GESVDJ_BASE : public ::TestWithParam { protected: GESVDJ_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = gesvdj_setup_arguments(GetParam(), STRIDED); if(arg.peek("m") == 1 && arg.peek("n") == 1 && arg.peek("jobz") == 'N' && (STRIDED || arg.peek("econ") == 0)) testing_gesvdj_bad_arg(); arg.batch_count = (BATCHED || STRIDED ? 3 : 1); testing_gesvdj(arg); } }; class GESVDJ : public GESVDJ_BASE { }; class GESVDJ_FORTRAN : public GESVDJ_BASE { }; // non-batch tests TEST_P(GESVDJ, __float) { run_tests(); } TEST_P(GESVDJ, __double) { run_tests(); } TEST_P(GESVDJ, __float_complex) { run_tests(); } TEST_P(GESVDJ, __double_complex) { run_tests(); } TEST_P(GESVDJ_FORTRAN, __float) { run_tests(); } TEST_P(GESVDJ_FORTRAN, __double) { run_tests(); } TEST_P(GESVDJ_FORTRAN, __float_complex) { run_tests(); } TEST_P(GESVDJ_FORTRAN, __double_complex) { run_tests(); } // strided_batched tests TEST_P(GESVDJ, strided_batched__float) { run_tests(); } TEST_P(GESVDJ, strided_batched__double) { run_tests(); } TEST_P(GESVDJ, strided_batched__float_complex) { run_tests(); } TEST_P(GESVDJ, strided_batched__double_complex) { run_tests(); } TEST_P(GESVDJ_FORTRAN, strided_batched__float) { run_tests(); } TEST_P(GESVDJ_FORTRAN, strided_batched__double) { run_tests(); } TEST_P(GESVDJ_FORTRAN, strided_batched__float_complex) { run_tests(); } TEST_P(GESVDJ_FORTRAN, strided_batched__double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESVDJ, // Combine(ValuesIn(large_size_range), ValuesIn(large_opt_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESVDJ, Combine(ValuesIn(size_range), ValuesIn(opt_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GESVDJ_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(large_opt_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GESVDJ_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(opt_range))); hipSOLVER-rocm-5.5.1/clients/gtest/getrf_gtest.cpp000066400000000000000000000141351436107207300220120ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_getrf.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, int> getrf_tuple; // each matrix_size_range vector is a {m, lda} // case when m = -1 and n = -1 will also execute the bad arguments test // (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> matrix_size_range = { // invalid {-1, 1}, {20, 5}, // normal (valid) samples {32, 32}, {50, 50}, {70, 100}, }; const vector n_size_range = { // invalid -1, // normal (valid) samples 16, 20, 40, 100, }; // // for daily_lapack tests // const vector> large_matrix_size_range = { // {192, 192}, // {640, 640}, // {1000, 1024}, // }; // const vector large_n_size_range = { // 45, // 64, // 520, // 1024, // 2000, // }; Arguments getrf_setup_arguments(getrf_tuple tup) { vector matrix_size = std::get<0>(tup); int n_size = std::get<1>(tup); Arguments arg; arg.set("m", matrix_size[0]); arg.set("lda", matrix_size[1]); arg.set("n", n_size); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class GETRF_BASE : public ::TestWithParam { protected: GETRF_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = getrf_setup_arguments(GetParam()); if(arg.peek("m") == -1 && arg.peek("n") == -1) testing_getrf_bad_arg(); arg.batch_count = 1; testing_getrf(arg); } }; class GETRF : public GETRF_BASE { }; class GETRF_FORTRAN : public GETRF_BASE { }; class GETRF_COMPAT : public GETRF_BASE { }; class GETRF_NPVT : public GETRF_BASE { }; // non-batch tests TEST_P(GETRF, __float) { run_tests(); } TEST_P(GETRF, __double) { run_tests(); } TEST_P(GETRF, __float_complex) { run_tests(); } TEST_P(GETRF, __double_complex) { run_tests(); } TEST_P(GETRF_FORTRAN, __float) { run_tests(); } TEST_P(GETRF_FORTRAN, __double) { run_tests(); } TEST_P(GETRF_FORTRAN, __float_complex) { run_tests(); } TEST_P(GETRF_FORTRAN, __double_complex) { run_tests(); } TEST_P(GETRF_COMPAT, __float) { run_tests(); } TEST_P(GETRF_COMPAT, __double) { run_tests(); } TEST_P(GETRF_COMPAT, __float_complex) { run_tests(); } TEST_P(GETRF_COMPAT, __double_complex) { run_tests(); } TEST_P(GETRF_NPVT, __float) { run_tests(); } TEST_P(GETRF_NPVT, __double) { run_tests(); } TEST_P(GETRF_NPVT, __float_complex) { run_tests(); } TEST_P(GETRF_NPVT, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GETRF, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GETRF, Combine(ValuesIn(matrix_size_range), ValuesIn(n_size_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GETRF_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GETRF_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(n_size_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GETRF_COMPAT, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GETRF_COMPAT, Combine(ValuesIn(matrix_size_range), ValuesIn(n_size_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GETRF_NPVT, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GETRF_NPVT, Combine(ValuesIn(matrix_size_range), ValuesIn(n_size_range))); hipSOLVER-rocm-5.5.1/clients/gtest/getrs_gtest.cpp000066400000000000000000000140631436107207300220270ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_getrs.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> getrs_tuple; // each A_range vector is a {N, lda, ldb}; // each B_range vector is a {nrhs, trans}; // if trans = 0 then no transpose // if trans = 1 then transpose // if trans = 2 then conjugate transpose // case when N = nrhs = -1 will also execute the bad arguments test // (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> matrix_sizeA_range = { // invalid {-1, 1, 1}, {10, 2, 10}, {10, 10, 2}, /// normal (valid) samples {20, 20, 20}, {30, 50, 30}, {30, 30, 50}, {50, 60, 60}}; const vector> matrix_sizeB_range = { // invalid {-1, 0}, // normal (valid) samples {10, 0}, {20, 1}, {30, 2}, }; // // for daily_lapack tests // const vector> large_matrix_sizeA_range // = {{70, 70, 100}, {192, 192, 192}, {600, 700, 645}, {1000, 1000, 1000}, {1000, 2000, 2000}}; // const vector> large_matrix_sizeB_range = { // {100, 0}, // {150, 0}, // {200, 1}, // {524, 2}, // {1000, 2}, // }; Arguments getrs_setup_arguments(getrs_tuple tup) { vector matrix_sizeA = std::get<0>(tup); vector matrix_sizeB = std::get<1>(tup); Arguments arg; arg.set("n", matrix_sizeA[0]); arg.set("nrhs", matrix_sizeB[0]); arg.set("lda", matrix_sizeA[1]); arg.set("ldb", matrix_sizeA[2]); if(matrix_sizeB[1] == 0) arg.set("trans", 'N'); else if(matrix_sizeB[1] == 1) arg.set("trans", 'T'); else arg.set("trans", 'C'); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class GETRS_BASE : public ::TestWithParam { protected: GETRS_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = getrs_setup_arguments(GetParam()); if(arg.peek("n") == -1 && arg.peek("nrhs") == -1) testing_getrs_bad_arg(); arg.batch_count = 1; testing_getrs(arg); } }; class GETRS : public GETRS_BASE { }; class GETRS_FORTRAN : public GETRS_BASE { }; class GETRS_COMPAT : public GETRS_BASE { }; // non-batch tests TEST_P(GETRS, __float) { run_tests(); } TEST_P(GETRS, __double) { run_tests(); } TEST_P(GETRS, __float_complex) { run_tests(); } TEST_P(GETRS, __double_complex) { run_tests(); } TEST_P(GETRS_FORTRAN, __float) { run_tests(); } TEST_P(GETRS_FORTRAN, __double) { run_tests(); } TEST_P(GETRS_FORTRAN, __float_complex) { run_tests(); } TEST_P(GETRS_FORTRAN, __double_complex) { run_tests(); } TEST_P(GETRS_COMPAT, __float) { run_tests(); } TEST_P(GETRS_COMPAT, __double) { run_tests(); } TEST_P(GETRS_COMPAT, __float_complex) { run_tests(); } TEST_P(GETRS_COMPAT, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GETRS, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GETRS, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GETRS_FORTRAN, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GETRS_FORTRAN, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // GETRS_COMPAT, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, GETRS_COMPAT, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); hipSOLVER-rocm-5.5.1/clients/gtest/hipsolver_gtest_main.cpp000066400000000000000000000046611436107207300237250ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include #include #include #include #include #include "utility.hpp" #define STRINGIFY(s) STRINGIFY_HELPER(s) #define STRINGIFY_HELPER(s) #s static void print_version_info() { // clang-format off std::cout << "hipSOLVER version " STRINGIFY(hipsolverVersionMajor) "." STRINGIFY(hipsolverVersionMinor) "." STRINGIFY(hipsolverVersionPatch) "." STRINGIFY(hipsolverVersionTweak) << std::endl; // clang-format on } /* ===================================================================== Main function: =================================================================== */ int main(int argc, char** argv) { print_version_info(); // print device info int device_count = query_device_property(); if(device_count <= 0) { std::cerr << "Error: No devices found" << std::endl; return EXIT_FAILURE; } set_device(0); // use first device ::testing::InitGoogleTest(&argc, argv); int status = RUN_ALL_TESTS(); print_version_info(); // redundant, but convenient when tests fail return status; } hipSOLVER-rocm-5.5.1/clients/gtest/orgbr_ungbr_gtest.cpp000066400000000000000000000135171436107207300232160ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_orgbr_ungbr.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> orgbr_tuple; // each size_range is a {M, N, K}; // each store_range vector is a {lda, side} // if lda = -1, then lda < limit (invalid size) // if lda = 0, then lda = limit // if lda = 1, then lda > limit // if st = 0, then side = 'L' // if st = 1, then side = 'R' // case when m = -1, n = 1 and side = 'L' will also execute the bad arguments // test (null handle, null pointers and invalid values) const vector> store_range = { // always invalid {-1, 0}, {-1, 1}, // normal (valid) samples {0, 0}, {0, 1}, {1, 0}, {1, 1}}; // for checkin_lapack tests const vector> size_range = { // always invalid {-1, 1, 1}, {1, -1, 1}, {1, 1, -1}, // invalid for side = 'L' {10, 30, 5}, // invalid for side = 'R' {30, 10, 5}, // always invalid {30, 10, 20}, {10, 30, 20}, // normal (valid) samples {30, 30, 1}, {20, 20, 20}, {50, 50, 50}, {100, 100, 50}}; // // for daily_lapack tests // const vector> large_size_range = {{150, 150, 100}, // {270, 270, 270}, // {400, 400, 400}, // {800, 800, 300}, // {1000, 1000, 1000}, // {1500, 1500, 800}}; Arguments orgbr_setup_arguments(orgbr_tuple tup) { vector size = std::get<0>(tup); vector store = std::get<1>(tup); Arguments arg; arg.set("m", size[0]); arg.set("n", size[1]); arg.set("k", size[2]); arg.set("lda", size[0] + store[0] * 10); arg.set("side", store[1] == 1 ? 'R' : 'L'); arg.timing = 0; return arg; } template class ORGBR_UNGBR : public ::TestWithParam { protected: ORGBR_UNGBR() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = orgbr_setup_arguments(GetParam()); if(arg.peek("m") == -1 && arg.peek("n") == 1 && arg.get("side") == 'L') testing_orgbr_ungbr_bad_arg(); testing_orgbr_ungbr(arg); } }; class ORGBR : public ORGBR_UNGBR { }; class UNGBR : public ORGBR_UNGBR { }; class ORGBR_FORTRAN : public ORGBR_UNGBR { }; class UNGBR_FORTRAN : public ORGBR_UNGBR { }; // non-batch tests TEST_P(ORGBR, __float) { run_tests(); } TEST_P(ORGBR, __double) { run_tests(); } TEST_P(UNGBR, __float_complex) { run_tests(); } TEST_P(UNGBR, __double_complex) { run_tests(); } TEST_P(ORGBR_FORTRAN, __float) { run_tests(); } TEST_P(ORGBR_FORTRAN, __double) { run_tests(); } TEST_P(UNGBR_FORTRAN, __float_complex) { run_tests(); } TEST_P(UNGBR_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // ORGBR, // Combine(ValuesIn(large_size_range), ValuesIn(store_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORGBR, Combine(ValuesIn(size_range), ValuesIn(store_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // UNGBR, // Combine(ValuesIn(large_size_range), ValuesIn(store_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNGBR, Combine(ValuesIn(size_range), ValuesIn(store_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // ORGBR_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(store_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORGBR_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(store_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // UNGBR_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(store_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNGBR_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(store_range))); hipSOLVER-rocm-5.5.1/clients/gtest/orgqr_ungqr_gtest.cpp000066400000000000000000000124661436107207300232560ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_orgqr_ungqr.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> orgqr_tuple; // each m_size_range vector is a {M, lda} // each n_size_range vector is a {N, K} // case when m = -1 and n = -1 will also execute the bad arguments test // (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> m_size_range = { // always invalid {-1, 1}, {20, 5}, // invalid for case * {50, 50}, // normal (valid) samples {70, 100}, {130, 130}}; const vector> n_size_range = { // always invalid {-1, 1}, {1, -1}, {10, 20}, // invalid for case * {55, 55}, // normal (valid) samples {10, 10}, {20, 20}, {35, 25}}; // // for daily_lapack tests // const vector> large_m_size_range = {{400, 410}, {640, 640}, {1000, 1024}, {2000, 2000}}; // const vector> large_n_size_range // = {{164, 162}, {198, 140}, {130, 130}, {220, 220}, {400, 200}}; Arguments orgqr_setup_arguments(orgqr_tuple tup) { vector m_size = std::get<0>(tup); vector n_size = std::get<1>(tup); Arguments arg; arg.set("m", m_size[0]); arg.set("lda", m_size[1]); arg.set("n", n_size[0]); arg.set("k", n_size[1]); arg.timing = 0; return arg; } template class ORGQR_UNGQR : public ::TestWithParam { protected: ORGQR_UNGQR() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = orgqr_setup_arguments(GetParam()); if(arg.peek("m") == -1 && arg.peek("n") == -1) testing_orgqr_ungqr_bad_arg(); testing_orgqr_ungqr(arg); } }; class ORGQR : public ORGQR_UNGQR { }; class UNGQR : public ORGQR_UNGQR { }; class ORGQR_FORTRAN : public ORGQR_UNGQR { }; class UNGQR_FORTRAN : public ORGQR_UNGQR { }; // non-batch tests TEST_P(ORGQR, __float) { run_tests(); } TEST_P(ORGQR, __double) { run_tests(); } TEST_P(UNGQR, __float_complex) { run_tests(); } TEST_P(UNGQR, __double_complex) { run_tests(); } TEST_P(ORGQR_FORTRAN, __float) { run_tests(); } TEST_P(ORGQR_FORTRAN, __double) { run_tests(); } TEST_P(UNGQR_FORTRAN, __float_complex) { run_tests(); } TEST_P(UNGQR_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // ORGQR, // Combine(ValuesIn(large_m_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORGQR, Combine(ValuesIn(m_size_range), ValuesIn(n_size_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // UNGQR, // Combine(ValuesIn(large_m_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNGQR, Combine(ValuesIn(m_size_range), ValuesIn(n_size_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // ORGQR_FORTRAN, // Combine(ValuesIn(large_m_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORGQR_FORTRAN, Combine(ValuesIn(m_size_range), ValuesIn(n_size_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // UNGQR_FORTRAN, // Combine(ValuesIn(large_m_size_range), ValuesIn(large_n_size_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNGQR_FORTRAN, Combine(ValuesIn(m_size_range), ValuesIn(n_size_range))); hipSOLVER-rocm-5.5.1/clients/gtest/orgtr_ungtr_gtest.cpp000066400000000000000000000113561436107207300232610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_orgtr_ungtr.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, int> orgtr_tuple; // each size_range vector is a {n, lda} // case when n = -1 and uplo = 'U' will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector uplo_range = {0, 1}; // for checkin_lapack tests const vector> size_range = { // invalid {-1, 1}, {20, 5}, // normal (valid) samples {32, 32}, {50, 50}, {70, 100}, {100, 150}}; // // for daily_lapack tests // const vector> large_size_range = {{192, 192}, {500, 600}, {640, 640}, {1000, 1024}}; Arguments orgtr_setup_arguments(orgtr_tuple tup) { vector size = std::get<0>(tup); int uplo = std::get<1>(tup); Arguments arg; arg.set("n", size[0]); arg.set("lda", size[1]); arg.set("uplo", uplo == 1 ? 'U' : 'L'); arg.timing = 0; return arg; } template class ORGTR_UNGTR : public ::TestWithParam { protected: ORGTR_UNGTR() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = orgtr_setup_arguments(GetParam()); if(arg.peek("n") == -1 && arg.peek("uplo") == 'U') testing_orgtr_ungtr_bad_arg(); testing_orgtr_ungtr(arg); } }; class ORGTR : public ORGTR_UNGTR { }; class UNGTR : public ORGTR_UNGTR { }; class ORGTR_FORTRAN : public ORGTR_UNGTR { }; class UNGTR_FORTRAN : public ORGTR_UNGTR { }; // non-batch tests TEST_P(ORGTR, __float) { run_tests(); } TEST_P(ORGTR, __double) { run_tests(); } TEST_P(UNGTR, __float_complex) { run_tests(); } TEST_P(UNGTR, __double_complex) { run_tests(); } TEST_P(ORGTR_FORTRAN, __float) { run_tests(); } TEST_P(ORGTR_FORTRAN, __double) { run_tests(); } TEST_P(UNGTR_FORTRAN, __float_complex) { run_tests(); } TEST_P(UNGTR_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, ORGTR, Combine(ValuesIn(large_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORGTR, Combine(ValuesIn(size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, UNGTR, Combine(ValuesIn(large_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNGTR, Combine(ValuesIn(size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // ORGTR_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORGTR_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // UNGTR_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNGTR_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(uplo_range))); hipSOLVER-rocm-5.5.1/clients/gtest/ormqr_unmqr_gtest.cpp000066400000000000000000000136621436107207300232710ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_ormqr_unmqr.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> ormqr_tuple; // each size_range vector is a {M, N, K} // each op_range vector is a {lda, ldc, s, t} // if lda = -1, then lda < limit (invalid size) // if lda = 0, then lda = limit // if lda = 1, then lda > limit // if ldc = -1, then ldc < limit (invalid size) // if ldc = 0, then ldc = limit // if ldc = 1, then ldc > limit // if s = 0, then side = 'L' // if s = 1, then side = 'R' // if t = 0, then trans = 'N' // if t = 1, then trans = 'T' // if t = 2, then trans = 'C' // case when m = -1, side = L and trans = T will also execute the bad arguments // test (null handle, null pointers and invalid values) const vector> op_range = { // invalid {-1, 0, 0, 0}, {0, -1, 0, 0}, // normal (valid) samples {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 2}, {0, 0, 1, 0}, {0, 0, 1, 1}, {0, 0, 1, 2}, {1, 1, 0, 0}}; // for checkin_lapack tests const vector> size_range = { // always invalid {-1, 1, 1}, {1, -1, 1}, {1, 1, -1}, // invalid for side = 'R' {20, 10, 20}, // invalid for side = 'L' {15, 25, 25}, // normal (valid) samples {40, 40, 40}, {45, 40, 30}, {50, 50, 20}}; // // for daily_lapack tests // const vector> large_size_range // = {{100, 100, 100}, {150, 100, 80}, {300, 400, 300}, {1024, 1000, 950}, {1500, 1500, 1000}}; Arguments ormqr_setup_arguments(ormqr_tuple tup) { vector size = std::get<0>(tup); vector op = std::get<1>(tup); Arguments arg; rocblas_int m = size[0]; rocblas_int n = size[1]; rocblas_int k = size[2]; arg.set("m", m); arg.set("n", n); arg.set("k", k); if(op[2] == 0) arg.set("lda", m + op[0] * 10); else arg.set("lda", n + op[0] * 10); arg.set("ldc", m + op[1] * 10); arg.set("side", op[2] == 0 ? 'L' : 'R'); arg.set("trans", (op[3] == 0 ? 'N' : (op[3] == 1 ? 'T' : 'C'))); arg.timing = 0; return arg; } template class ORMQR_UNMQR : public ::TestWithParam { protected: ORMQR_UNMQR() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = ormqr_setup_arguments(GetParam()); if(arg.peek("m") == -1 && arg.peek("side") == 'L' && arg.peek("trans") == 'T') testing_ormqr_unmqr_bad_arg(); testing_ormqr_unmqr(arg); } }; class ORMQR : public ORMQR_UNMQR { }; class UNMQR : public ORMQR_UNMQR { }; class ORMQR_FORTRAN : public ORMQR_UNMQR { }; class UNMQR_FORTRAN : public ORMQR_UNMQR { }; // non-batch tests TEST_P(ORMQR, __float) { run_tests(); } TEST_P(ORMQR, __double) { run_tests(); } TEST_P(UNMQR, __float_complex) { run_tests(); } TEST_P(UNMQR, __double_complex) { run_tests(); } TEST_P(ORMQR_FORTRAN, __float) { run_tests(); } TEST_P(ORMQR_FORTRAN, __double) { run_tests(); } TEST_P(UNMQR_FORTRAN, __float_complex) { run_tests(); } TEST_P(UNMQR_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // ORMQR, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORMQR, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // UNMQR, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNMQR, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // ORMQR_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORMQR_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // UNMQR_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNMQR_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(op_range))); hipSOLVER-rocm-5.5.1/clients/gtest/ormtr_unmtr_gtest.cpp000066400000000000000000000143731436107207300232770ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_ormtr_unmtr.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> ormtr_tuple; // each size_range vector is a {M, N} // each store_range vector is a {lda, ldc, s, t, u} // if lda = -1, then lda < limit (invalid size) // if lda = 0, then lda = limit // if lda = 1, then lda > limit // if ldc = -1, then ldc < limit (invalid size) // if ldc = 0, then ldc = limit // if ldc = 1, then ldc > limit // if s = 0, then side = 'L' // if s = 1, then side = 'R' // if t = 0, then trans = 'N' // if t = 1, then trans = 'T' // if t = 2, then trans = 'C' // if u = 0, then uplo = 'U' // if u = 1, then uplo = 'L' // case when m = -1, n = 1, side = 'L', trans = 'T' and uplo = 'U' // will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector> store_range = { // invalid {-1, 0, 0, 0, 0}, {0, -1, 0, 0, 0}, // normal (valid) samples {1, 1, 0, 0, 0}, {1, 1, 0, 0, 1}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 1}, {0, 0, 0, 1, 0}, {0, 0, 0, 1, 1}, {0, 0, 0, 2, 0}, {0, 0, 0, 2, 1}, {0, 0, 1, 0, 0}, {0, 0, 1, 0, 1}, {0, 0, 1, 1, 0}, {0, 0, 1, 1, 1}, {0, 0, 1, 2, 0}, {0, 0, 1, 2, 1}, }; // for checkin_lapack tests const vector> size_range = { // invalid {-1, 1}, {1, -1}, // normal (valid) samples {10, 30}, {20, 5}, {20, 20}, {50, 50}, {70, 40}, }; // // for daily_lapack tests // const vector> large_size_range = { // {200, 150}, // {270, 270}, // {400, 400}, // {800, 500}, // {1500, 1000}, // }; Arguments ormtr_setup_arguments(ormtr_tuple tup) { vector size = std::get<0>(tup); vector store = std::get<1>(tup); Arguments arg; rocblas_int m = size[0]; rocblas_int n = size[1]; arg.set("m", m); arg.set("n", n); int nq = store[2] == 0 ? m : n; arg.set("lda", nq + store[0] * 10); arg.set("ldc", m + store[1] * 10); arg.set("side", store[2] == 0 ? 'L' : 'R'); arg.set("trans", (store[3] == 0 ? 'N' : (store[3] == 1 ? 'T' : 'C'))); arg.set("uplo", store[4] == 0 ? 'U' : 'L'); arg.timing = 0; return arg; } template class ORMTR_UNMTR : public ::TestWithParam { protected: ORMTR_UNMTR() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = ormtr_setup_arguments(GetParam()); if(arg.peek("m") == -1 && arg.peek("n") == 1 && arg.peek("side") == 'L' && arg.peek("trans") == 'T' && arg.peek("uplo") == 'U') testing_ormtr_unmtr_bad_arg(); testing_ormtr_unmtr(arg); } }; class ORMTR : public ORMTR_UNMTR { }; class UNMTR : public ORMTR_UNMTR { }; class ORMTR_FORTRAN : public ORMTR_UNMTR { }; class UNMTR_FORTRAN : public ORMTR_UNMTR { }; // non-batch tests TEST_P(ORMTR, __float) { run_tests(); } TEST_P(ORMTR, __double) { run_tests(); } TEST_P(UNMTR, __float_complex) { run_tests(); } TEST_P(UNMTR, __double_complex) { run_tests(); } TEST_P(ORMTR_FORTRAN, __float) { run_tests(); } TEST_P(ORMTR_FORTRAN, __double) { run_tests(); } TEST_P(UNMTR_FORTRAN, __float_complex) { run_tests(); } TEST_P(UNMTR_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // ORMTR, // Combine(ValuesIn(large_size_range), ValuesIn(store_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORMTR, Combine(ValuesIn(size_range), ValuesIn(store_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // UNMTR, // Combine(ValuesIn(large_size_range), ValuesIn(store_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNMTR, Combine(ValuesIn(size_range), ValuesIn(store_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // ORMTR_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(store_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, ORMTR_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(store_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // UNMTR_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(store_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, UNMTR_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(store_range))); hipSOLVER-rocm-5.5.1/clients/gtest/potrf_gtest.cpp000066400000000000000000000146151436107207300220400ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_potrf.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, char> potrf_tuple; // each size_range vector is a {N, lda} // if singular = 1, then the used matrix for the tests is not positive definite // each uplo_range is a {uplo} // case when n = -1 and uplo = L will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector uplo_range = {'L', 'U'}; // for checkin_lapack tests const vector> matrix_size_range = { // invalid {-1, 1}, {10, 2}, // normal (valid) samples {10, 10}, {20, 30}, {50, 50}, {70, 80}}; // // for daily_lapack tests // const vector> large_matrix_size_range = { // {192, 192}, // {640, 960}, // {1000, 1000}, // {1024, 1024}, // {2000, 2000}, // }; Arguments potrf_setup_arguments(potrf_tuple tup) { vector matrix_size = std::get<0>(tup); char uplo = std::get<1>(tup); Arguments arg; arg.set("n", matrix_size[0]); arg.set("lda", matrix_size[1]); arg.set("uplo", uplo); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class POTRF_BASE : public ::TestWithParam { protected: POTRF_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = potrf_setup_arguments(GetParam()); if(arg.peek("uplo") == 'L' && arg.peek("n") == -1) testing_potrf_bad_arg(); arg.batch_count = (BATCHED || STRIDED ? 3 : 1); testing_potrf(arg); } }; class POTRF : public POTRF_BASE { }; class POTRF_FORTRAN : public POTRF_BASE { }; class POTRF_COMPAT : public POTRF_BASE { }; // non-batch tests TEST_P(POTRF, __float) { run_tests(); } TEST_P(POTRF, __double) { run_tests(); } TEST_P(POTRF, __float_complex) { run_tests(); } TEST_P(POTRF, __double_complex) { run_tests(); } TEST_P(POTRF_FORTRAN, __float) { run_tests(); } TEST_P(POTRF_FORTRAN, __double) { run_tests(); } TEST_P(POTRF_FORTRAN, __float_complex) { run_tests(); } TEST_P(POTRF_FORTRAN, __double_complex) { run_tests(); } TEST_P(POTRF_COMPAT, __float) { run_tests(); } TEST_P(POTRF_COMPAT, __double) { run_tests(); } TEST_P(POTRF_COMPAT, __float_complex) { run_tests(); } TEST_P(POTRF_COMPAT, __double_complex) { run_tests(); } // batched tests TEST_P(POTRF, batched__float) { run_tests(); } TEST_P(POTRF, batched__double) { run_tests(); } TEST_P(POTRF, batched__float_complex) { run_tests(); } TEST_P(POTRF, batched__double_complex) { run_tests(); } TEST_P(POTRF_FORTRAN, batched__float) { run_tests(); } TEST_P(POTRF_FORTRAN, batched__double) { run_tests(); } TEST_P(POTRF_FORTRAN, batched__float_complex) { run_tests(); } TEST_P(POTRF_FORTRAN, batched__double_complex) { run_tests(); } TEST_P(POTRF_COMPAT, batched__float) { run_tests(); } TEST_P(POTRF_COMPAT, batched__double) { run_tests(); } TEST_P(POTRF_COMPAT, batched__float_complex) { run_tests(); } TEST_P(POTRF_COMPAT, batched__double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // POTRF, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, POTRF, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // POTRF_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, POTRF_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // POTRF_COMPAT, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, POTRF_COMPAT, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); hipSOLVER-rocm-5.5.1/clients/gtest/potri_gtest.cpp000066400000000000000000000107441436107207300220420ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_potri.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, char> potri_tuple; // each matrix_size_range vector is a {n, lda} // each uplo_range is a {uplo} // case when n = -1 and uplo = L will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector uplo_range = {'L', 'U'}; // for checkin_lapack tests const vector> matrix_size_range = { // invalid {-1, 1}, {20, 5}, // normal (valid) samples {32, 32}, {50, 50}, {70, 100}, {100, 150}}; // // for daily_lapack tests // const vector> large_matrix_size_range // = {{192, 192, 1}, {500, 600, 1}, {640, 640, 0}, {1000, 1024, 0}, {1200, 1230, 0}}; Arguments potri_setup_arguments(potri_tuple tup) { vector matrix_size = std::get<0>(tup); char uplo = std::get<1>(tup); Arguments arg; arg.set("n", matrix_size[0]); arg.set("lda", matrix_size[1]); arg.set("uplo", uplo); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class POTRI_BASE : public ::TestWithParam { protected: POTRI_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = potri_setup_arguments(GetParam()); if(arg.peek("uplo") == 'L' && arg.peek("n") == -1) testing_potri_bad_arg(); arg.batch_count = 1; testing_potri(arg); } }; class POTRI : public POTRI_BASE { }; class POTRI_FORTRAN : public POTRI_BASE { }; // non-batch tests TEST_P(POTRI, __float) { run_tests(); } TEST_P(POTRI, __double) { run_tests(); } TEST_P(POTRI, __float_complex) { run_tests(); } TEST_P(POTRI, __double_complex) { run_tests(); } TEST_P(POTRI_FORTRAN, __float) { run_tests(); } TEST_P(POTRI_FORTRAN, __double) { run_tests(); } TEST_P(POTRI_FORTRAN, __float_complex) { run_tests(); } TEST_P(POTRI_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // POTRI, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, POTRI, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // POTRI_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, POTRI_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); hipSOLVER-rocm-5.5.1/clients/gtest/potrs_gtest.cpp000066400000000000000000000157171436107207300220610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_potrs.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> potrs_tuple; // each A_range vector is a {N, lda, ldb}; // each B_range vector is a {nrhs, uplo}; // if uplo = 0 then upper // if uplo = 1 then lower // case when N = nrhs = -1 will also execute the bad arguments test // (null handle, null pointers and invalid values) // for checkin_lapack tests const vector> matrix_sizeA_range = { // invalid {-1, 1, 1}, {10, 2, 10}, {10, 10, 2}, /// normal (valid) samples {20, 20, 20}, {30, 50, 30}, {30, 30, 50}, {50, 60, 60}}; const vector> matrix_sizeB_range = { // invalid {-1, 0}, // normal (valid) samples {1, 0}, {1, 1}, }; // // for daily_lapack tests // const vector> large_matrix_sizeA_range // = {{70, 70, 100}, {192, 192, 192}, {600, 700, 645}, {1000, 1000, 1000}, {1000, 2000, 2000}}; // const vector> large_matrix_sizeB_range = { // {1, 0}, // {1, 1}, // }; Arguments potrs_setup_arguments(potrs_tuple tup) { vector matrix_sizeA = std::get<0>(tup); vector matrix_sizeB = std::get<1>(tup); Arguments arg; arg.set("n", matrix_sizeA[0]); arg.set("nrhs", matrix_sizeB[0]); arg.set("lda", matrix_sizeA[1]); arg.set("ldb", matrix_sizeA[2]); if(matrix_sizeB[1] == 0) arg.set("uplo", 'U'); else arg.set("uplo", 'L'); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class POTRS_BASE : public ::TestWithParam { protected: POTRS_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = potrs_setup_arguments(GetParam()); if(arg.peek("n") == -1 && arg.peek("nrhs") == -1) testing_potrs_bad_arg(); arg.batch_count = (BATCHED || STRIDED ? 3 : 1); testing_potrs(arg); } }; class POTRS : public POTRS_BASE { }; class POTRS_FORTRAN : public POTRS_BASE { }; class POTRS_COMPAT : public POTRS_BASE { }; // non-batch tests TEST_P(POTRS, __float) { run_tests(); } TEST_P(POTRS, __double) { run_tests(); } TEST_P(POTRS, __float_complex) { run_tests(); } TEST_P(POTRS, __double_complex) { run_tests(); } TEST_P(POTRS_FORTRAN, __float) { run_tests(); } TEST_P(POTRS_FORTRAN, __double) { run_tests(); } TEST_P(POTRS_FORTRAN, __float_complex) { run_tests(); } TEST_P(POTRS_FORTRAN, __double_complex) { run_tests(); } TEST_P(POTRS_COMPAT, __float) { run_tests(); } TEST_P(POTRS_COMPAT, __double) { run_tests(); } TEST_P(POTRS_COMPAT, __float_complex) { run_tests(); } TEST_P(POTRS_COMPAT, __double_complex) { run_tests(); } // batched tests TEST_P(POTRS, batched__float) { run_tests(); } TEST_P(POTRS, batched__double) { run_tests(); } TEST_P(POTRS, batched__float_complex) { run_tests(); } TEST_P(POTRS, batched__double_complex) { run_tests(); } TEST_P(POTRS_FORTRAN, batched__float) { run_tests(); } TEST_P(POTRS_FORTRAN, batched__double) { run_tests(); } TEST_P(POTRS_FORTRAN, batched__float_complex) { run_tests(); } TEST_P(POTRS_FORTRAN, batched__double_complex) { run_tests(); } TEST_P(POTRS_COMPAT, batched__float) { run_tests(); } TEST_P(POTRS_COMPAT, batched__double) { run_tests(); } TEST_P(POTRS_COMPAT, batched__float_complex) { run_tests(); } TEST_P(POTRS_COMPAT, batched__double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // POTRS, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, POTRS, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // POTRS_FORTRAN, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, POTRS_FORTRAN, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // POTRS_COMPAT, // Combine(ValuesIn(large_matrix_sizeA_range), // ValuesIn(large_matrix_sizeB_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, POTRS_COMPAT, Combine(ValuesIn(matrix_sizeA_range), ValuesIn(matrix_sizeB_range))); hipSOLVER-rocm-5.5.1/clients/gtest/syevd_heevd_gtest.cpp000066400000000000000000000122741436107207300232120ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_syevd_heevd.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> syevd_heevd_tuple; // each size_range vector is a {n, lda} // each op_range vector is a {jobz, uplo} // case when n == -1, jobz == N, and uplo = L will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector> op_range = {{'N', 'L'}, {'N', 'U'}, {'V', 'L'}, {'V', 'U'}}; // for checkin_lapack tests const vector> size_range = { // invalid {-1, 1}, {10, 5}, // normal (valid) samples {1, 1}, {12, 12}, {20, 30}, {35, 35}, {50, 60}}; // // for daily_lapack tests // const vector> large_size_range = {{192, 192}, {256, 270}, {300, 300}}; Arguments syevd_heevd_setup_arguments(syevd_heevd_tuple tup) { vector size = std::get<0>(tup); vector op = std::get<1>(tup); Arguments arg; arg.set("n", size[0]); arg.set("lda", size[1]); arg.set("jobz", op[0]); arg.set("uplo", op[1]); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class SYEVD_HEEVD : public ::TestWithParam { protected: SYEVD_HEEVD() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = syevd_heevd_setup_arguments(GetParam()); if(arg.peek("n") == -1 && arg.peek("jobz") == 'N' && arg.peek("uplo") == 'L') testing_syevd_heevd_bad_arg(); arg.batch_count = 1; testing_syevd_heevd(arg); } }; class SYEVD : public SYEVD_HEEVD { }; class HEEVD : public SYEVD_HEEVD { }; class SYEVD_FORTRAN : public SYEVD_HEEVD { }; class HEEVD_FORTRAN : public SYEVD_HEEVD { }; // non-batch tests TEST_P(SYEVD, __float) { run_tests(); } TEST_P(SYEVD, __double) { run_tests(); } TEST_P(HEEVD, __float_complex) { run_tests(); } TEST_P(HEEVD, __double_complex) { run_tests(); } TEST_P(SYEVD_FORTRAN, __float) { run_tests(); } TEST_P(SYEVD_FORTRAN, __double) { run_tests(); } TEST_P(HEEVD_FORTRAN, __float_complex) { run_tests(); } TEST_P(HEEVD_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYEVD, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYEVD, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEEVD, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEEVD, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYEVD_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYEVD_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEEVD_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEEVD_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(op_range))); hipSOLVER-rocm-5.5.1/clients/gtest/syevdx_heevdx_gtest.cpp000066400000000000000000000114511436107207300235660ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_syevdx_heevdx.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> syevdx_heevdx_tuple; // each size_range vector is a {n, lda, vl, vu, il, iu} // each op_range vector is a {jobz, range, uplo} // case when n == 1, jobz == N, range == V, uplo = L will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector> op_range = {{'N', 'V', 'L'}, {'V', 'A', 'U'}, {'V', 'V', 'L'}, {'V', 'I', 'U'}}; // for checkin_lapack tests const vector> size_range = { // invalid {-1, 1, 0, 10, 1, 1}, {10, 10, 0, 10, 1, 1}, // normal (valid) samples {1, 1, 0, 10, 1, 1}, {12, 12, -20, 20, 10, 12}, {20, 30, 5, 15, 1, 20}, {35, 35, -10, 10, 1, 15}, {50, 60, -15, -5, 20, 30}}; // // for daily_lapack tests // const vector> large_size_range // = {{192, 192, 5, 15, 100, 170}, {256, 270, -10, 10, 1, 256}, {300, 300, -15, -5, 200, 300}}; template Arguments syevdx_heevdx_setup_arguments(syevdx_heevdx_tuple tup) { using S = decltype(std::real(T{})); vector size = std::get<0>(tup); vector op = std::get<1>(tup); Arguments arg; arg.set("n", size[0]); arg.set("lda", size[1]); arg.set("vl", size[2]); arg.set("vu", size[3]); arg.set("il", size[4]); arg.set("iu", size[5]); arg.set("jobz", op[0]); arg.set("range", op[1]); arg.set("uplo", op[2]); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class SYEVDX_HEEVDX : public ::TestWithParam { protected: SYEVDX_HEEVDX() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = syevdx_heevdx_setup_arguments(GetParam()); if(arg.peek("n") == 1 && arg.peek("jobz") == 'N' && arg.peek("range") == 'V' && arg.peek("uplo") == 'L') testing_syevdx_heevdx_bad_arg(); arg.batch_count = 1; testing_syevdx_heevdx(arg); } }; class SYEVDX_COMPAT : public SYEVDX_HEEVDX { }; class HEEVDX_COMPAT : public SYEVDX_HEEVDX { }; // non-batch tests TEST_P(SYEVDX_COMPAT, __float) { run_tests(); } TEST_P(SYEVDX_COMPAT, __double) { run_tests(); } TEST_P(HEEVDX_COMPAT, __float_complex) { run_tests(); } TEST_P(HEEVDX_COMPAT, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYEVDX_COMPAT, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYEVDX_COMPAT, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEEVDX_COMPAT, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEEVDX_COMPAT, Combine(ValuesIn(size_range), ValuesIn(op_range))); hipSOLVER-rocm-5.5.1/clients/gtest/syevj_heevj_gtest.cpp000066400000000000000000000142071436107207300232240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_syevj_heevj.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> syevj_heevj_tuple; // each size_range vector is a {n, lda} // each op_range vector is a {jobz, uplo} // case when n == 1, jobz == N, and uplo = L will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector> op_range = {{'N', 'L'}, {'N', 'U'}, {'V', 'L'}, {'V', 'U'}}; // for checkin_lapack tests const vector> size_range = { // normal (valid) samples {1, 1}, {12, 12}, {20, 30}, {35, 35}, {50, 60}}; // // for daily_lapack tests // const vector> large_size_range = {{192, 192}, {256, 270}, {300, 300}}; template Arguments syevj_heevj_setup_arguments(syevj_heevj_tuple tup) { vector size = std::get<0>(tup); vector op = std::get<1>(tup); Arguments arg; arg.set("n", size[0]); arg.set("lda", size[1]); arg.set("jobz", op[0]); arg.set("uplo", op[1]); arg.set("tolerance", 2 * get_epsilon()); arg.set("max_sweeps", 100); arg.set("sort_eig", 1); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class SYEVJ_HEEVJ : public ::TestWithParam { protected: SYEVJ_HEEVJ() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = syevj_heevj_setup_arguments(GetParam()); if(arg.peek("n") == 1 && arg.peek("jobz") == 'N' && arg.peek("uplo") == 'L') testing_syevj_heevj_bad_arg(); arg.batch_count = (BATCHED || STRIDED ? 3 : 1); testing_syevj_heevj(arg); } }; class SYEVJ : public SYEVJ_HEEVJ { }; class HEEVJ : public SYEVJ_HEEVJ { }; class SYEVJ_FORTRAN : public SYEVJ_HEEVJ { }; class HEEVJ_FORTRAN : public SYEVJ_HEEVJ { }; // non-batch tests TEST_P(SYEVJ, __float) { run_tests(); } TEST_P(SYEVJ, __double) { run_tests(); } TEST_P(HEEVJ, __float_complex) { run_tests(); } TEST_P(HEEVJ, __double_complex) { run_tests(); } TEST_P(SYEVJ_FORTRAN, __float) { run_tests(); } TEST_P(SYEVJ_FORTRAN, __double) { run_tests(); } TEST_P(HEEVJ_FORTRAN, __float_complex) { run_tests(); } TEST_P(HEEVJ_FORTRAN, __double_complex) { run_tests(); } // strided_batched tests TEST_P(SYEVJ, strided_batched__float) { run_tests(); } TEST_P(SYEVJ, strided_batched__double) { run_tests(); } TEST_P(HEEVJ, strided_batched__float_complex) { run_tests(); } TEST_P(HEEVJ, strided_batched__double_complex) { run_tests(); } TEST_P(SYEVJ_FORTRAN, strided_batched__float) { run_tests(); } TEST_P(SYEVJ_FORTRAN, strided_batched__double) { run_tests(); } TEST_P(HEEVJ_FORTRAN, strided_batched__float_complex) { run_tests(); } TEST_P(HEEVJ_FORTRAN, strided_batched__double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYEVJ, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYEVJ, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEEVJ, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEEVJ, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYEVJ_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYEVJ_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(op_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEEVJ_FORTRAN, // Combine(ValuesIn(large_size_range), ValuesIn(op_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEEVJ_FORTRAN, Combine(ValuesIn(size_range), ValuesIn(op_range))); hipSOLVER-rocm-5.5.1/clients/gtest/sygvd_hegvd_gtest.cpp000066400000000000000000000134511436107207300232140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_sygvd_hegvd.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> sygvd_tuple; // each matrix_size_range is a {n, lda, ldb} // each type_range is a {itype, jobz, uplo} // case when n = -1, itype = 1, jobz = 'N', and uplo = U will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector> type_range = {{'1', 'N', 'U'}, {'2', 'N', 'L'}, {'3', 'N', 'U'}, {'1', 'V', 'L'}, {'2', 'V', 'U'}, {'3', 'V', 'L'}}; // for checkin_lapack tests const vector> matrix_size_range = { // invalid {-1, 1, 1}, {20, 5, 5}, // normal (valid) samples {20, 30, 20}, {35, 35, 35}, {50, 50, 60}}; // // for daily_lapack tests // const vector> large_matrix_size_range = { // {192, 192, 192}, // {256, 270, 256}, // {300, 300, 310}, // }; Arguments sygvd_setup_arguments(sygvd_tuple tup) { vector matrix_size = std::get<0>(tup); vector type = std::get<1>(tup); Arguments arg; arg.set("n", matrix_size[0]); arg.set("lda", matrix_size[1]); arg.set("ldb", matrix_size[2]); arg.set("itype", type[0]); arg.set("jobz", type[1]); arg.set("uplo", type[2]); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class SYGVD_HEGVD : public ::TestWithParam { protected: SYGVD_HEGVD() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = sygvd_setup_arguments(GetParam()); if(arg.peek("itype") == '1' && arg.peek("jobz") == 'N' && arg.peek("uplo") == 'U' && arg.peek("n") == -1) testing_sygvd_hegvd_bad_arg(); arg.batch_count = 1; testing_sygvd_hegvd(arg); } }; class SYGVD : public SYGVD_HEGVD { }; class HEGVD : public SYGVD_HEGVD { }; class SYGVD_FORTRAN : public SYGVD_HEGVD { }; class HEGVD_FORTRAN : public SYGVD_HEGVD { }; // non-batch tests TEST_P(SYGVD, __float) { run_tests(); } TEST_P(SYGVD, __double) { run_tests(); } TEST_P(HEGVD, __float_complex) { run_tests(); } TEST_P(HEGVD, __double_complex) { run_tests(); } TEST_P(SYGVD_FORTRAN, __float) { run_tests(); } TEST_P(SYGVD_FORTRAN, __double) { run_tests(); } TEST_P(HEGVD_FORTRAN, __float_complex) { run_tests(); } TEST_P(HEGVD_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYGVD, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYGVD, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEGVD, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEGVD, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYGVD_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYGVD_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEGVD_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEGVD_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); hipSOLVER-rocm-5.5.1/clients/gtest/sygvdx_hegvdx_gtest.cpp000066400000000000000000000125331436107207300235740ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_sygvdx_hegvdx.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> sygvdx_tuple; // each matrix_size_range is a {n, lda, ldb, vl, vu, il, iu} // each type_range is a {itype, jobz, range, uplo} // case when n = -1, itype = 1, jobz = 'N', range = 'A', and uplo = U will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector> type_range = {{'1', 'N', 'A', 'U'}, {'2', 'N', 'V', 'L'}, {'3', 'N', 'I', 'U'}, {'1', 'V', 'V', 'L'}, {'2', 'V', 'I', 'U'}, {'3', 'V', 'A', 'L'}}; // for checkin_lapack tests const vector> matrix_size_range = { // invalid {-1, 1, 1, 0, 10, 1, 1}, {20, 5, 5, 0, 10, 1, 1}, // valid only when erange=A {20, 20, 20, 10, 0, 10, 1}, // normal (valid) samples {20, 30, 20, 5, 15, 1, 10}, {35, 35, 35, -10, 10, 1, 35}, {50, 50, 60, -15, -5, 25, 50}, }; // // for daily_lapack tests // const vector> large_matrix_size_range = { // {192, 192, 192, 5, 15, 100, 150}, // {256, 270, 256, -10, 10, 1, 100}, // {300, 300, 310, -15, -5, 200, 300}, // }; template Arguments sygvdx_setup_arguments(sygvdx_tuple tup) { using S = decltype(std::real(T{})); vector matrix_size = std::get<0>(tup); vector type = std::get<1>(tup); Arguments arg; arg.set("n", matrix_size[0]); arg.set("lda", matrix_size[1]); arg.set("ldb", matrix_size[2]); arg.set("vl", matrix_size[3]); arg.set("vu", matrix_size[4]); arg.set("il", matrix_size[5]); arg.set("iu", matrix_size[6]); arg.set("itype", type[0]); arg.set("jobz", type[1]); arg.set("range", type[2]); arg.set("uplo", type[3]); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class SYGVDX_HEGVDX : public ::TestWithParam { protected: SYGVDX_HEGVDX() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = sygvdx_setup_arguments(GetParam()); if(arg.peek("itype") == '1' && arg.peek("jobz") == 'N' && arg.peek("range") == 'A' && arg.peek("uplo") == 'U' && arg.peek("n") == -1) testing_sygvdx_hegvdx_bad_arg(); arg.batch_count = 1; testing_sygvdx_hegvdx(arg); } }; class SYGVDX_COMPAT : public SYGVDX_HEGVDX { }; class HEGVDX_COMPAT : public SYGVDX_HEGVDX { }; // non-batch tests TEST_P(SYGVDX_COMPAT, __float) { run_tests(); } TEST_P(SYGVDX_COMPAT, __double) { run_tests(); } TEST_P(HEGVDX_COMPAT, __float_complex) { run_tests(); } TEST_P(HEGVDX_COMPAT, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYGVDX_COMPAT, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYGVDX_COMPAT, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEGVDX_COMPAT, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEGVDX_COMPAT, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); hipSOLVER-rocm-5.5.1/clients/gtest/sygvj_hegvj_gtest.cpp000066400000000000000000000137421436107207300232330ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_sygvj_hegvj.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, vector> sygvj_tuple; // each matrix_size_range is a {n, lda, ldb} // each type_range is a {itype, jobz, uplo} // case when n = -1, itype = 1, jobz = 'N', and uplo = U will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector> type_range = {{'1', 'N', 'U'}, {'2', 'N', 'L'}, {'3', 'N', 'U'}, {'1', 'V', 'L'}, {'2', 'V', 'U'}, {'3', 'V', 'L'}}; // for checkin_lapack tests const vector> matrix_size_range = { // invalid {-1, 1, 1}, {20, 5, 5}, // normal (valid) samples {20, 30, 20}, {35, 35, 35}, {50, 50, 60}}; // // for daily_lapack tests // const vector> large_matrix_size_range = { // {192, 192, 192}, // {256, 270, 256}, // {300, 300, 310}, // }; template Arguments sygvj_setup_arguments(sygvj_tuple tup) { vector matrix_size = std::get<0>(tup); vector type = std::get<1>(tup); Arguments arg; arg.set("n", matrix_size[0]); arg.set("lda", matrix_size[1]); arg.set("ldb", matrix_size[2]); arg.set("itype", type[0]); arg.set("jobz", type[1]); arg.set("uplo", type[2]); arg.set("tolerance", 2 * get_epsilon()); arg.set("max_sweeps", 100); arg.set("sort_eig", 1); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class SYGVJ_HEGVJ : public ::TestWithParam { protected: SYGVJ_HEGVJ() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = sygvj_setup_arguments(GetParam()); if(arg.peek("itype") == '1' && arg.peek("jobz") == 'N' && arg.peek("uplo") == 'U' && arg.peek("n") == -1) testing_sygvj_hegvj_bad_arg(); arg.batch_count = 1; testing_sygvj_hegvj(arg); } }; class SYGVJ : public SYGVJ_HEGVJ { }; class HEGVJ : public SYGVJ_HEGVJ { }; class SYGVJ_FORTRAN : public SYGVJ_HEGVJ { }; class HEGVJ_FORTRAN : public SYGVJ_HEGVJ { }; // non-batch tests TEST_P(SYGVJ, __float) { run_tests(); } TEST_P(SYGVJ, __double) { run_tests(); } TEST_P(HEGVJ, __float_complex) { run_tests(); } TEST_P(HEGVJ, __double_complex) { run_tests(); } TEST_P(SYGVJ_FORTRAN, __float) { run_tests(); } TEST_P(SYGVJ_FORTRAN, __double) { run_tests(); } TEST_P(HEGVJ_FORTRAN, __float_complex) { run_tests(); } TEST_P(HEGVJ_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYGVJ, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYGVJ, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEGVJ, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEGVJ, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYGVJ_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYGVJ_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HEGVJ_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(type_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HEGVJ_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(type_range))); hipSOLVER-rocm-5.5.1/clients/gtest/sytrd_hetrd_gtest.cpp000066400000000000000000000123151436107207300232340ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_sytrd_hetrd.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, char> sytrd_tuple; // each matrix_size_range is a {n, lda} // case when n = -1 and uplo = U will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector uplo_range = {'L', 'U'}; // for checkin_lapack tests const vector> matrix_size_range = { // invalid {-1, 1}, {20, 5}, // normal (valid) samples {50, 50}, {70, 100}, {130, 130}, {150, 200}}; // // for daily_lapack tests // const vector> large_matrix_size_range = { // {152, 152}, // {640, 640}, // {1000, 1024}, // }; Arguments sytrd_setup_arguments(sytrd_tuple tup) { vector matrix_size = std::get<0>(tup); char uplo = std::get<1>(tup); Arguments arg; arg.set("n", matrix_size[0]); arg.set("lda", matrix_size[1]); arg.set("uplo", uplo); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class SYTRD_HETRD : public ::TestWithParam { protected: SYTRD_HETRD() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = sytrd_setup_arguments(GetParam()); if(arg.peek("uplo") == 'U' && arg.peek("n") == -1) testing_sytrd_hetrd_bad_arg(); arg.batch_count = 1; testing_sytrd_hetrd(arg); } }; class SYTRD : public SYTRD_HETRD { }; class HETRD : public SYTRD_HETRD { }; class SYTRD_FORTRAN : public SYTRD_HETRD { }; class HETRD_FORTRAN : public SYTRD_HETRD { }; // non-batch tests TEST_P(SYTRD, __float) { run_tests(); } TEST_P(SYTRD, __double) { run_tests(); } TEST_P(HETRD, __float_complex) { run_tests(); } TEST_P(HETRD, __double_complex) { run_tests(); } TEST_P(SYTRD_FORTRAN, __float) { run_tests(); } TEST_P(SYTRD_FORTRAN, __double) { run_tests(); } TEST_P(HETRD_FORTRAN, __float_complex) { run_tests(); } TEST_P(HETRD_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYTRD, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYTRD, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HETRD, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HETRD, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYTRD_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYTRD_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // HETRD_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, HETRD_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); hipSOLVER-rocm-5.5.1/clients/gtest/sytrf_gtest.cpp000066400000000000000000000110111436107207300220400ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "testing_sytrf.hpp" using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; using ::testing::ValuesIn; using namespace std; typedef std::tuple, char> sytrf_tuple; // each matrix_size_range vector is a {n, lda} // each uplo_range is a {uplo} // case when n = -1 and uplo = L will also execute the bad arguments test // (null handle, null pointers and invalid values) const vector uplo_range = {'L', 'U'}; // for checkin_lapack tests const vector> matrix_size_range = { #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // invalid {-1, 1}, {20, 5}, #endif // normal (valid) samples {32, 32}, {50, 50}, {70, 100}}; // // for daily_lapack tests // const vector> large_matrix_size_range = { // {192, 192}, // {640, 640}, // {1000, 1024}, // }; Arguments sytrf_setup_arguments(sytrf_tuple tup) { vector matrix_size = std::get<0>(tup); char uplo = std::get<1>(tup); Arguments arg; arg.set("n", matrix_size[0]); arg.set("lda", matrix_size[1]); arg.set("uplo", uplo); // only testing standard use case/defaults for strides arg.timing = 0; return arg; } template class SYTRF_BASE : public ::TestWithParam { protected: SYTRF_BASE() {} virtual void SetUp() {} virtual void TearDown() {} template void run_tests() { Arguments arg = sytrf_setup_arguments(GetParam()); if(arg.peek("uplo") == 'L' && arg.peek("n") == -1) testing_sytrf_bad_arg(); arg.batch_count = 1; testing_sytrf(arg); } }; class SYTRF : public SYTRF_BASE { }; class SYTRF_FORTRAN : public SYTRF_BASE { }; // non-batch tests TEST_P(SYTRF, __float) { run_tests(); } TEST_P(SYTRF, __double) { run_tests(); } TEST_P(SYTRF, __float_complex) { run_tests(); } TEST_P(SYTRF, __double_complex) { run_tests(); } TEST_P(SYTRF_FORTRAN, __float) { run_tests(); } TEST_P(SYTRF_FORTRAN, __double) { run_tests(); } TEST_P(SYTRF_FORTRAN, __float_complex) { run_tests(); } TEST_P(SYTRF_FORTRAN, __double_complex) { run_tests(); } // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYTRF, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYTRF, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); // INSTANTIATE_TEST_SUITE_P(daily_lapack, // SYTRF_FORTRAN, // Combine(ValuesIn(large_matrix_size_range), ValuesIn(uplo_range))); INSTANTIATE_TEST_SUITE_P(checkin_lapack, SYTRF_FORTRAN, Combine(ValuesIn(matrix_size_range), ValuesIn(uplo_range))); hipSOLVER-rocm-5.5.1/clients/include/000077500000000000000000000000001436107207300172625ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/clients/include/clientcommon.hpp000066400000000000000000000031051436107207300224610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "../rocblascommon/rocblas_vector.hpp" #include "../rocsolvercommon/norm.hpp" #include "../rocsolvercommon/rocsolver_arguments.hpp" #include "../rocsolvercommon/rocsolver_test.hpp" #include "hipsolver.hpp" #include "lapack_host_reference.hpp" using namespace std; hipSOLVER-rocm-5.5.1/clients/include/complex.hpp000066400000000000000000000165101436107207300214450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "hipsolver.h" #include typedef struct hipsolverComplex { private: float x, y; public: hipsolverComplex() = default; hipsolverComplex(float r, float i = 0) : x(r) , y(i) { } float real() const { return x; } float imag() const { return y; } void real(float r) { x = r; } void imag(float i) { y = i; } } hipsolverComplex; typedef struct hipsolverDoubleComplex { private: double x, y; public: hipsolverDoubleComplex() = default; hipsolverDoubleComplex(double r, double i = 0) : x(r) , y(i) { } double real() const { return x; } double imag() const { return y; } void real(double r) { x = r; } void imag(double i) { y = i; } } hipsolverDoubleComplex; inline hipsolverComplex& operator+=(hipsolverComplex& lhs, const hipsolverComplex& rhs) { reinterpret_cast&>(lhs) += reinterpret_cast&>(rhs); return lhs; } inline hipsolverDoubleComplex& operator+=(hipsolverDoubleComplex& lhs, const hipsolverDoubleComplex& rhs) { reinterpret_cast&>(lhs) += reinterpret_cast&>(rhs); return lhs; } inline hipsolverComplex operator+(hipsolverComplex lhs, const hipsolverComplex& rhs) { return lhs += rhs; } inline hipsolverDoubleComplex operator+(hipsolverDoubleComplex lhs, const hipsolverDoubleComplex& rhs) { return lhs += rhs; } inline hipsolverComplex& operator-=(hipsolverComplex& lhs, const hipsolverComplex& rhs) { reinterpret_cast&>(lhs) -= reinterpret_cast&>(rhs); return lhs; } inline hipsolverDoubleComplex& operator-=(hipsolverDoubleComplex& lhs, const hipsolverDoubleComplex& rhs) { reinterpret_cast&>(lhs) -= reinterpret_cast&>(rhs); return lhs; } inline hipsolverComplex operator-(hipsolverComplex lhs, const hipsolverComplex& rhs) { return lhs -= rhs; } inline hipsolverDoubleComplex operator-(hipsolverDoubleComplex lhs, const hipsolverDoubleComplex& rhs) { return lhs -= rhs; } inline hipsolverComplex& operator*=(hipsolverComplex& lhs, const hipsolverComplex& rhs) { reinterpret_cast&>(lhs) *= reinterpret_cast&>(rhs); return lhs; } inline hipsolverDoubleComplex& operator*=(hipsolverDoubleComplex& lhs, const hipsolverDoubleComplex& rhs) { reinterpret_cast&>(lhs) *= reinterpret_cast&>(rhs); return lhs; } inline hipsolverComplex operator*(hipsolverComplex lhs, const hipsolverComplex& rhs) { return lhs *= rhs; } inline hipsolverDoubleComplex operator*(hipsolverDoubleComplex lhs, const hipsolverDoubleComplex& rhs) { return lhs *= rhs; } inline hipsolverComplex& operator/=(hipsolverComplex& lhs, const hipsolverComplex& rhs) { reinterpret_cast&>(lhs) /= reinterpret_cast&>(rhs); return lhs; } inline hipsolverDoubleComplex& operator/=(hipsolverDoubleComplex& lhs, const hipsolverDoubleComplex& rhs) { reinterpret_cast&>(lhs) /= reinterpret_cast&>(rhs); return lhs; } inline hipsolverComplex operator/(hipsolverComplex lhs, const hipsolverComplex& rhs) { return lhs /= rhs; } inline hipsolverDoubleComplex operator/(hipsolverDoubleComplex lhs, const hipsolverDoubleComplex& rhs) { return lhs /= rhs; } inline bool operator==(const hipsolverComplex& lhs, const hipsolverComplex& rhs) { return reinterpret_cast&>(lhs) == reinterpret_cast&>(rhs); } inline bool operator!=(const hipsolverComplex& lhs, const hipsolverComplex& rhs) { return !(lhs == rhs); } inline bool operator==(const hipsolverDoubleComplex& lhs, const hipsolverDoubleComplex& rhs) { return reinterpret_cast&>(lhs) == reinterpret_cast&>(rhs); } inline bool operator!=(const hipsolverDoubleComplex& lhs, const hipsolverDoubleComplex& rhs) { return !(lhs == rhs); } inline hipsolverComplex operator-(const hipsolverComplex& x) { return {-x.real(), -x.imag()}; } inline hipsolverDoubleComplex operator-(const hipsolverDoubleComplex& x) { return {-x.real(), -x.imag()}; } inline hipsolverComplex operator+(const hipsolverComplex& x) { return x; } inline hipsolverDoubleComplex operator+(const hipsolverDoubleComplex& x) { return x; } namespace std { inline float real(const hipsolverComplex& z) { return z.real(); } inline double real(const hipsolverDoubleComplex& z) { return z.real(); } inline float imag(const hipsolverComplex& z) { return z.imag(); } inline double imag(const hipsolverDoubleComplex& z) { return z.imag(); } inline hipsolverComplex conj(const hipsolverComplex& z) { return {z.real(), -z.imag()}; } inline hipsolverDoubleComplex conj(const hipsolverDoubleComplex& z) { return {z.real(), -z.imag()}; } inline float abs(const hipsolverComplex& z) { return abs(reinterpret_cast&>(z)); } inline double abs(const hipsolverDoubleComplex& z) { return abs(reinterpret_cast&>(z)); } inline float conj(const float& r) { return r; } inline double conj(const double& r) { return r; } } hipSOLVER-rocm-5.5.1/clients/include/hipsolver.hpp000066400000000000000000013772661436107207300220340ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "hipsolver.h" #ifdef _WIN32 #include "hipsolver_no_fortran.hpp" #else #include "hipsolver_fortran.hpp" #endif // Most functions within this file exist to provide a consistent interface for our templated tests. // Function overloading is used to select between the float, double, rocblas_float_complex // and rocblas_double_complex variants, and to distinguish the batched case (T**) from the normal // and strided_batched cases (T*). // // The normal and strided_batched cases are distinguished from each other by passing a boolean // parameter, STRIDED. Variants such as the blocked and unblocked versions of algorithms, may be // provided in similar ways. typedef enum { API_NORMAL, API_FORTRAN, API_COMPAT } testAPI_t; typedef enum { C_NORMAL, C_NORMAL_ALT, C_STRIDED, C_STRIDED_ALT, FORTRAN_NORMAL, FORTRAN_NORMAL_ALT, FORTRAN_STRIDED, FORTRAN_STRIDED_ALT, COMPAT_NORMAL, COMPAT_NORMAL_ALT, COMPAT_STRIDED, COMPAT_STRIDED_ALT, INVALID_API_SPEC } testMarshal_t; inline testMarshal_t api2marshal(testAPI_t API, bool ALT) { switch(API) { case API_NORMAL: if(!ALT) return C_NORMAL; else return C_NORMAL_ALT; case API_FORTRAN: if(!ALT) return FORTRAN_NORMAL; else return FORTRAN_NORMAL_ALT; case API_COMPAT: if(!ALT) return COMPAT_NORMAL; else return COMPAT_NORMAL_ALT; default: return INVALID_API_SPEC; } } /******************** ORGBR/UNGBR ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_orgbr_ungbr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, float* A, int lda, float* tau, int* lwork) { if(!FORTRAN) return hipsolverSorgbr_bufferSize(handle, side, m, n, k, A, lda, tau, lwork); else return hipsolverSorgbr_bufferSizeFortran(handle, side, m, n, k, A, lda, tau, lwork); } inline hipsolverStatus_t hipsolver_orgbr_ungbr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, double* A, int lda, double* tau, int* lwork) { if(!FORTRAN) return hipsolverDorgbr_bufferSize(handle, side, m, n, k, A, lda, tau, lwork); else return hipsolverDorgbr_bufferSizeFortran(handle, side, m, n, k, A, lda, tau, lwork); } inline hipsolverStatus_t hipsolver_orgbr_ungbr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, hipsolverComplex* A, int lda, hipsolverComplex* tau, int* lwork) { if(!FORTRAN) return hipsolverCungbr_bufferSize( handle, side, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, lwork); else return hipsolverCungbr_bufferSizeFortran( handle, side, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, lwork); } inline hipsolverStatus_t hipsolver_orgbr_ungbr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, int* lwork) { if(!FORTRAN) return hipsolverZungbr_bufferSize( handle, side, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, lwork); else return hipsolverZungbr_bufferSizeFortran( handle, side, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, lwork); } inline hipsolverStatus_t hipsolver_orgbr_ungbr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, float* A, int lda, float* tau, float* work, int lwork, int* info) { if(!FORTRAN) return hipsolverSorgbr(handle, side, m, n, k, A, lda, tau, work, lwork, info); else return hipsolverSorgbrFortran(handle, side, m, n, k, A, lda, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_orgbr_ungbr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, double* A, int lda, double* tau, double* work, int lwork, int* info) { if(!FORTRAN) return hipsolverDorgbr(handle, side, m, n, k, A, lda, tau, work, lwork, info); else return hipsolverDorgbrFortran(handle, side, m, n, k, A, lda, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_orgbr_ungbr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, hipsolverComplex* A, int lda, hipsolverComplex* tau, hipsolverComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverCungbr(handle, side, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); else return hipsolverCungbrFortran(handle, side, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_orgbr_ungbr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, hipsolverDoubleComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverZungbr(handle, side, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); else return hipsolverZungbrFortran(handle, side, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); } /******************** ORGQR/UNGQR ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_orgqr_ungqr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, int m, int n, int k, float* A, int lda, float* tau, int* lwork) { if(!FORTRAN) return hipsolverSorgqr_bufferSize(handle, m, n, k, A, lda, tau, lwork); else return hipsolverSorgqr_bufferSizeFortran(handle, m, n, k, A, lda, tau, lwork); } inline hipsolverStatus_t hipsolver_orgqr_ungqr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, int m, int n, int k, double* A, int lda, double* tau, int* lwork) { if(!FORTRAN) return hipsolverDorgqr_bufferSize(handle, m, n, k, A, lda, tau, lwork); else return hipsolverDorgqr_bufferSizeFortran(handle, m, n, k, A, lda, tau, lwork); } inline hipsolverStatus_t hipsolver_orgqr_ungqr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, int m, int n, int k, hipsolverComplex* A, int lda, hipsolverComplex* tau, int* lwork) { if(!FORTRAN) return hipsolverCungqr_bufferSize( handle, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, lwork); else return hipsolverCungqr_bufferSizeFortran( handle, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, lwork); } inline hipsolverStatus_t hipsolver_orgqr_ungqr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, int m, int n, int k, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, int* lwork) { if(!FORTRAN) return hipsolverZungqr_bufferSize( handle, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, lwork); else return hipsolverZungqr_bufferSizeFortran( handle, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, lwork); } inline hipsolverStatus_t hipsolver_orgqr_ungqr(bool FORTRAN, hipsolverHandle_t handle, int m, int n, int k, float* A, int lda, float* tau, float* work, int lwork, int* info) { if(!FORTRAN) return hipsolverSorgqr(handle, m, n, k, A, lda, tau, work, lwork, info); else return hipsolverSorgqrFortran(handle, m, n, k, A, lda, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_orgqr_ungqr(bool FORTRAN, hipsolverHandle_t handle, int m, int n, int k, double* A, int lda, double* tau, double* work, int lwork, int* info) { if(!FORTRAN) return hipsolverDorgqr(handle, m, n, k, A, lda, tau, work, lwork, info); else return hipsolverDorgqrFortran(handle, m, n, k, A, lda, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_orgqr_ungqr(bool FORTRAN, hipsolverHandle_t handle, int m, int n, int k, hipsolverComplex* A, int lda, hipsolverComplex* tau, hipsolverComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverCungqr(handle, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); else return hipsolverCungqrFortran(handle, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_orgqr_ungqr(bool FORTRAN, hipsolverHandle_t handle, int m, int n, int k, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, hipsolverDoubleComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverZungqr(handle, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); else return hipsolverZungqrFortran(handle, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** ORGTR/UNGTR ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_orgtr_ungtr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, float* tau, int* lwork) { if(!FORTRAN) return hipsolverSorgtr_bufferSize(handle, uplo, n, A, lda, tau, lwork); else return hipsolverSorgtr_bufferSizeFortran(handle, uplo, n, A, lda, tau, lwork); } inline hipsolverStatus_t hipsolver_orgtr_ungtr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, double* tau, int* lwork) { if(!FORTRAN) return hipsolverDorgtr_bufferSize(handle, uplo, n, A, lda, tau, lwork); else return hipsolverDorgtr_bufferSizeFortran(handle, uplo, n, A, lda, tau, lwork); } inline hipsolverStatus_t hipsolver_orgtr_ungtr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, hipsolverComplex* tau, int* lwork) { if(!FORTRAN) return hipsolverCungtr_bufferSize( handle, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, lwork); else return hipsolverCungtr_bufferSizeFortran( handle, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, lwork); } inline hipsolverStatus_t hipsolver_orgtr_ungtr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, int* lwork) { if(!FORTRAN) return hipsolverZungtr_bufferSize( handle, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, lwork); else return hipsolverZungtr_bufferSizeFortran( handle, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, lwork); } inline hipsolverStatus_t hipsolver_orgtr_ungtr(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, float* tau, float* work, int lwork, int* info) { if(!FORTRAN) return hipsolverSorgtr(handle, uplo, n, A, lda, tau, work, lwork, info); else return hipsolverSorgtrFortran(handle, uplo, n, A, lda, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_orgtr_ungtr(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, double* tau, double* work, int lwork, int* info) { if(!FORTRAN) return hipsolverDorgtr(handle, uplo, n, A, lda, tau, work, lwork, info); else return hipsolverDorgtrFortran(handle, uplo, n, A, lda, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_orgtr_ungtr(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, hipsolverComplex* tau, hipsolverComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverCungtr(handle, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); else return hipsolverCungtrFortran(handle, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_orgtr_ungtr(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, hipsolverDoubleComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverZungtr(handle, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); else return hipsolverZungtrFortran(handle, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** ORMQR/UNMQR ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_ormqr_unmqr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, float* A, int lda, float* tau, float* C, int ldc, int* lwork) { if(!FORTRAN) return hipsolverSormqr_bufferSize(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); else return hipsolverSormqr_bufferSizeFortran( handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); } inline hipsolverStatus_t hipsolver_ormqr_unmqr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, double* A, int lda, double* tau, double* C, int ldc, int* lwork) { if(!FORTRAN) return hipsolverDormqr_bufferSize(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); else return hipsolverDormqr_bufferSizeFortran( handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); } inline hipsolverStatus_t hipsolver_ormqr_unmqr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipsolverComplex* A, int lda, hipsolverComplex* tau, hipsolverComplex* C, int ldc, int* lwork) { if(!FORTRAN) return hipsolverCunmqr_bufferSize(handle, side, trans, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)C, ldc, lwork); else return hipsolverCunmqr_bufferSizeFortran(handle, side, trans, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)C, ldc, lwork); } inline hipsolverStatus_t hipsolver_ormqr_unmqr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, hipsolverDoubleComplex* C, int ldc, int* lwork) { if(!FORTRAN) return hipsolverZunmqr_bufferSize(handle, side, trans, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)C, ldc, lwork); else return hipsolverZunmqr_bufferSizeFortran(handle, side, trans, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)C, ldc, lwork); } inline hipsolverStatus_t hipsolver_ormqr_unmqr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, float* A, int lda, float* tau, float* C, int ldc, float* work, int lwork, int* info) { if(!FORTRAN) return hipsolverSormqr( handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info); else return hipsolverSormqrFortran( handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info); } inline hipsolverStatus_t hipsolver_ormqr_unmqr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, double* A, int lda, double* tau, double* C, int ldc, double* work, int lwork, int* info) { if(!FORTRAN) return hipsolverDormqr( handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info); else return hipsolverDormqrFortran( handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info); } inline hipsolverStatus_t hipsolver_ormqr_unmqr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipsolverComplex* A, int lda, hipsolverComplex* tau, hipsolverComplex* C, int ldc, hipsolverComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverCunmqr(handle, side, trans, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)C, ldc, (hipFloatComplex*)work, lwork, info); else return hipsolverCunmqrFortran(handle, side, trans, m, n, k, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)C, ldc, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_ormqr_unmqr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, hipsolverDoubleComplex* C, int ldc, hipsolverDoubleComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverZunmqr(handle, side, trans, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)C, ldc, (hipDoubleComplex*)work, lwork, info); else return hipsolverZunmqrFortran(handle, side, trans, m, n, k, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)C, ldc, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** ORMTR/UNMTR ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_ormtr_unmtr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, float* A, int lda, float* tau, float* C, int ldc, int* lwork) { if(!FORTRAN) return hipsolverSormtr_bufferSize( handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); else return hipsolverSormtr_bufferSizeFortran( handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); } inline hipsolverStatus_t hipsolver_ormtr_unmtr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, double* A, int lda, double* tau, double* C, int ldc, int* lwork) { if(!FORTRAN) return hipsolverDormtr_bufferSize( handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); else return hipsolverDormtr_bufferSizeFortran( handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); } inline hipsolverStatus_t hipsolver_ormtr_unmtr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipsolverComplex* A, int lda, hipsolverComplex* tau, hipsolverComplex* C, int ldc, int* lwork) { if(!FORTRAN) return hipsolverCunmtr_bufferSize(handle, side, uplo, trans, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)C, ldc, lwork); else return hipsolverCunmtr_bufferSizeFortran(handle, side, uplo, trans, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)C, ldc, lwork); } inline hipsolverStatus_t hipsolver_ormtr_unmtr_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, hipsolverDoubleComplex* C, int ldc, int* lwork) { if(!FORTRAN) return hipsolverZunmtr_bufferSize(handle, side, uplo, trans, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)C, ldc, lwork); else return hipsolverZunmtr_bufferSizeFortran(handle, side, uplo, trans, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)C, ldc, lwork); } inline hipsolverStatus_t hipsolver_ormtr_unmtr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, float* A, int lda, float* tau, float* C, int ldc, float* work, int lwork, int* info) { if(!FORTRAN) return hipsolverSormtr( handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info); else return hipsolverSormtrFortran( handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info); } inline hipsolverStatus_t hipsolver_ormtr_unmtr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, double* A, int lda, double* tau, double* C, int ldc, double* work, int lwork, int* info) { if(!FORTRAN) return hipsolverDormtr( handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info); else return hipsolverDormtrFortran( handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info); } inline hipsolverStatus_t hipsolver_ormtr_unmtr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipsolverComplex* A, int lda, hipsolverComplex* tau, hipsolverComplex* C, int ldc, hipsolverComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverCunmtr(handle, side, uplo, trans, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)C, ldc, (hipFloatComplex*)work, lwork, info); else return hipsolverCunmtrFortran(handle, side, uplo, trans, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)C, ldc, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_ormtr_unmtr(bool FORTRAN, hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* tau, hipsolverDoubleComplex* C, int ldc, hipsolverDoubleComplex* work, int lwork, int* info) { if(!FORTRAN) return hipsolverZunmtr(handle, side, uplo, trans, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)C, ldc, (hipDoubleComplex*)work, lwork, info); else return hipsolverZunmtrFortran(handle, side, uplo, trans, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)C, ldc, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** GEBRD ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_gebrd_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int m, int n, float* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverSgebrd_bufferSize(handle, m, n, lwork); else return hipsolverSgebrd_bufferSizeFortran(handle, m, n, lwork); } inline hipsolverStatus_t hipsolver_gebrd_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int m, int n, double* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverDgebrd_bufferSize(handle, m, n, lwork); else return hipsolverDgebrd_bufferSizeFortran(handle, m, n, lwork); } inline hipsolverStatus_t hipsolver_gebrd_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int m, int n, hipsolverComplex* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverCgebrd_bufferSize(handle, m, n, lwork); else return hipsolverCgebrd_bufferSizeFortran(handle, m, n, lwork); } inline hipsolverStatus_t hipsolver_gebrd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, int m, int n, hipsolverDoubleComplex* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverZgebrd_bufferSize(handle, m, n, lwork); else return hipsolverZgebrd_bufferSizeFortran(handle, m, n, lwork); } inline hipsolverStatus_t hipsolver_gebrd(bool FORTRAN, hipsolverHandle_t handle, int m, int n, float* A, int lda, int stA, float* D, int stD, float* E, int stE, float* tauq, int stQ, float* taup, int stP, float* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverSgebrd(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info); else return hipsolverSgebrdFortran(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info); } inline hipsolverStatus_t hipsolver_gebrd(bool FORTRAN, hipsolverHandle_t handle, int m, int n, double* A, int lda, int stA, double* D, int stD, double* E, int stE, double* tauq, int stQ, double* taup, int stP, double* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverDgebrd(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info); else return hipsolverDgebrdFortran(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info); } inline hipsolverStatus_t hipsolver_gebrd(bool FORTRAN, hipsolverHandle_t handle, int m, int n, hipsolverComplex* A, int lda, int stA, float* D, int stD, float* E, int stE, hipsolverComplex* tauq, int stQ, hipsolverComplex* taup, int stP, hipsolverComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverCgebrd(handle, m, n, (hipFloatComplex*)A, lda, D, E, (hipFloatComplex*)tauq, (hipFloatComplex*)taup, (hipFloatComplex*)work, lwork, info); else return hipsolverCgebrdFortran(handle, m, n, (hipFloatComplex*)A, lda, D, E, (hipFloatComplex*)tauq, (hipFloatComplex*)taup, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_gebrd(bool FORTRAN, hipsolverHandle_t handle, int m, int n, hipsolverDoubleComplex* A, int lda, int stA, double* D, int stD, double* E, int stE, hipsolverDoubleComplex* tauq, int stQ, hipsolverDoubleComplex* taup, int stP, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverZgebrd(handle, m, n, (hipDoubleComplex*)A, lda, D, E, (hipDoubleComplex*)tauq, (hipDoubleComplex*)taup, (hipDoubleComplex*)work, lwork, info); else return hipsolverZgebrdFortran(handle, m, n, (hipDoubleComplex*)A, lda, D, E, (hipDoubleComplex*)tauq, (hipDoubleComplex*)taup, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** GEQRF ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_gels_bufferSize(testAPI_t API, hipsolverHandle_t handle, int m, int n, int nrhs, float* A, int lda, float* B, int ldb, float* X, int ldx, size_t* lwork) { switch(API) { case API_NORMAL: return hipsolverSSgels_bufferSize(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork); case API_FORTRAN: return hipsolverSSgels_bufferSizeFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork); case API_COMPAT: return hipsolverDnSSgels_bufferSize( handle, m, n, nrhs, A, lda, B, ldb, X, ldx, nullptr, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gels_bufferSize(testAPI_t API, hipsolverHandle_t handle, int m, int n, int nrhs, double* A, int lda, double* B, int ldb, double* X, int ldx, size_t* lwork) { switch(API) { case API_NORMAL: return hipsolverDDgels_bufferSize(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork); case API_FORTRAN: return hipsolverDDgels_bufferSizeFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork); case API_COMPAT: return hipsolverDnDDgels_bufferSize( handle, m, n, nrhs, A, lda, B, ldb, X, ldx, nullptr, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gels_bufferSize(testAPI_t API, hipsolverHandle_t handle, int m, int n, int nrhs, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, hipsolverComplex* X, int ldx, size_t* lwork) { switch(API) { case API_NORMAL: return hipsolverCCgels_bufferSize(handle, m, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, lwork); case API_FORTRAN: return hipsolverCCgels_bufferSizeFortran(handle, m, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, lwork); case API_COMPAT: return hipsolverDnCCgels_bufferSize(handle, m, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, nullptr, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gels_bufferSize(testAPI_t API, hipsolverHandle_t handle, int m, int n, int nrhs, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, hipsolverDoubleComplex* X, int ldx, size_t* lwork) { switch(API) { case API_NORMAL: return hipsolverZZgels_bufferSize(handle, m, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, lwork); case API_FORTRAN: return hipsolverZZgels_bufferSizeFortran(handle, m, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, lwork); case API_COMPAT: return hipsolverDnZZgels_bufferSize(handle, m, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, nullptr, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gels(testAPI_t API, bool INPLACE, hipsolverHandle_t handle, int m, int n, int nrhs, float* A, int lda, int stA, float* B, int ldb, int stB, float* X, int ldx, int stX, float* work, size_t lwork, int* niters, int* info, int bc) { switch(api2marshal(API, INPLACE)) { case C_NORMAL: return hipsolverSSgels( handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info); case C_NORMAL_ALT: return hipsolverSSgels( handle, m, n, nrhs, A, lda, B, ldb, B, ldb, work, lwork, niters, info); case FORTRAN_NORMAL: return hipsolverSSgelsFortran( handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info); case COMPAT_NORMAL: return hipsolverDnSSgels( handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gels(testAPI_t API, bool INPLACE, hipsolverHandle_t handle, int m, int n, int nrhs, double* A, int lda, int stA, double* B, int ldb, int stB, double* X, int ldx, int stX, double* work, size_t lwork, int* niters, int* info, int bc) { switch(api2marshal(API, INPLACE)) { case C_NORMAL: return hipsolverDDgels( handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info); case C_NORMAL_ALT: return hipsolverDDgels( handle, m, n, nrhs, A, lda, B, ldb, B, ldb, work, lwork, niters, info); case FORTRAN_NORMAL: return hipsolverDDgelsFortran( handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info); case COMPAT_NORMAL: return hipsolverDnDDgels( handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gels(testAPI_t API, bool INPLACE, hipsolverHandle_t handle, int m, int n, int nrhs, hipsolverComplex* A, int lda, int stA, hipsolverComplex* B, int ldb, int stB, hipsolverComplex* X, int ldx, int stX, hipsolverComplex* work, size_t lwork, int* niters, int* info, int bc) { switch(api2marshal(API, INPLACE)) { case C_NORMAL: return hipsolverCCgels(handle, m, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, work, lwork, niters, info); case C_NORMAL_ALT: return hipsolverCCgels(handle, m, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, (hipFloatComplex*)B, ldb, work, lwork, niters, info); case FORTRAN_NORMAL: return hipsolverCCgelsFortran(handle, m, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, work, lwork, niters, info); case COMPAT_NORMAL: return hipsolverDnCCgels(handle, m, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, work, lwork, niters, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gels(testAPI_t API, bool INPLACE, hipsolverHandle_t handle, int m, int n, int nrhs, hipsolverDoubleComplex* A, int lda, int stA, hipsolverDoubleComplex* B, int ldb, int stB, hipsolverDoubleComplex* X, int ldx, int stX, hipsolverDoubleComplex* work, size_t lwork, int* niters, int* info, int bc) { switch(api2marshal(API, INPLACE)) { case C_NORMAL: return hipsolverZZgels(handle, m, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, work, lwork, niters, info); case C_NORMAL_ALT: return hipsolverZZgels(handle, m, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)B, ldb, work, lwork, niters, info); case FORTRAN_NORMAL: return hipsolverZZgelsFortran(handle, m, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, work, lwork, niters, info); case COMPAT_NORMAL: return hipsolverDnZZgels(handle, m, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, work, lwork, niters, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** GEQRF ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_geqrf_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int m, int n, float* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverSgeqrf_bufferSize(handle, m, n, A, lda, lwork); else return hipsolverSgeqrf_bufferSizeFortran(handle, m, n, A, lda, lwork); } inline hipsolverStatus_t hipsolver_geqrf_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int m, int n, double* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverDgeqrf_bufferSize(handle, m, n, A, lda, lwork); else return hipsolverDgeqrf_bufferSizeFortran(handle, m, n, A, lda, lwork); } inline hipsolverStatus_t hipsolver_geqrf_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int m, int n, hipsolverComplex* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverCgeqrf_bufferSize(handle, m, n, (hipFloatComplex*)A, lda, lwork); else return hipsolverCgeqrf_bufferSizeFortran(handle, m, n, (hipFloatComplex*)A, lda, lwork); } inline hipsolverStatus_t hipsolver_geqrf_bufferSize(bool FORTRAN, hipsolverHandle_t handle, int m, int n, hipsolverDoubleComplex* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverZgeqrf_bufferSize(handle, m, n, (hipDoubleComplex*)A, lda, lwork); else return hipsolverZgeqrf_bufferSizeFortran(handle, m, n, (hipDoubleComplex*)A, lda, lwork); } inline hipsolverStatus_t hipsolver_geqrf(bool FORTRAN, hipsolverHandle_t handle, int m, int n, float* A, int lda, int stA, float* tau, int stT, float* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverSgeqrf(handle, m, n, A, lda, tau, work, lwork, info); else return hipsolverSgeqrfFortran(handle, m, n, A, lda, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_geqrf(bool FORTRAN, hipsolverHandle_t handle, int m, int n, double* A, int lda, int stA, double* tau, int stT, double* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverDgeqrf(handle, m, n, A, lda, tau, work, lwork, info); else return hipsolverDgeqrfFortran(handle, m, n, A, lda, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_geqrf(bool FORTRAN, hipsolverHandle_t handle, int m, int n, hipsolverComplex* A, int lda, int stA, hipsolverComplex* tau, int stT, hipsolverComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverCgeqrf(handle, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); else return hipsolverCgeqrfFortran(handle, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_geqrf(bool FORTRAN, hipsolverHandle_t handle, int m, int n, hipsolverDoubleComplex* A, int lda, int stA, hipsolverDoubleComplex* tau, int stT, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverZgeqrf(handle, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); else return hipsolverZgeqrfFortran(handle, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** GESV ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_gesv_bufferSize(testAPI_t API, hipsolverHandle_t handle, int n, int nrhs, float* A, int lda, int* ipiv, float* B, int ldb, float* X, int ldx, size_t* lwork) { switch(API) { case API_NORMAL: return hipsolverSSgesv_bufferSize(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork); case API_FORTRAN: return hipsolverSSgesv_bufferSizeFortran( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork); case API_COMPAT: return hipsolverDnSSgesv_bufferSize( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, nullptr, lwork); default: *lwork; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesv_bufferSize(testAPI_t API, hipsolverHandle_t handle, int n, int nrhs, double* A, int lda, int* ipiv, double* B, int ldb, double* X, int ldx, size_t* lwork) { switch(API) { case API_NORMAL: return hipsolverDDgesv_bufferSize(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork); case API_FORTRAN: return hipsolverDDgesv_bufferSizeFortran( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork); case API_COMPAT: return hipsolverDnDDgesv_bufferSize( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, nullptr, lwork); default: *lwork; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesv_bufferSize(testAPI_t API, hipsolverHandle_t handle, int n, int nrhs, hipsolverComplex* A, int lda, int* ipiv, hipsolverComplex* B, int ldb, hipsolverComplex* X, int ldx, size_t* lwork) { switch(API) { case API_NORMAL: return hipsolverCCgesv_bufferSize(handle, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, lwork); case API_FORTRAN: return hipsolverCCgesv_bufferSizeFortran(handle, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, lwork); case API_COMPAT: return hipsolverDnCCgesv_bufferSize(handle, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, nullptr, lwork); default: *lwork; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesv_bufferSize(testAPI_t API, hipsolverHandle_t handle, int n, int nrhs, hipsolverDoubleComplex* A, int lda, int* ipiv, hipsolverDoubleComplex* B, int ldb, hipsolverDoubleComplex* X, int ldx, size_t* lwork) { switch(API) { case API_NORMAL: return hipsolverZZgesv_bufferSize(handle, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, lwork); case API_FORTRAN: return hipsolverZZgesv_bufferSizeFortran(handle, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, lwork); case API_COMPAT: return hipsolverDnZZgesv_bufferSize(handle, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, nullptr, lwork); default: *lwork; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesv(testAPI_t API, bool INPLACE, hipsolverHandle_t handle, int n, int nrhs, float* A, int lda, int stA, int* ipiv, int stP, float* B, int ldb, int stB, float* X, int ldx, int stX, float* work, size_t lwork, int* niters, int* info, int bc) { switch(api2marshal(API, INPLACE)) { case C_NORMAL: return hipsolverSSgesv( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info); case C_NORMAL_ALT: return hipsolverSSgesv( handle, n, nrhs, A, lda, ipiv, B, ldb, B, ldb, work, lwork, niters, info); case FORTRAN_NORMAL: return hipsolverSSgesvFortran( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info); case COMPAT_NORMAL: return hipsolverDnSSgesv( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesv(testAPI_t API, bool INPLACE, hipsolverHandle_t handle, int n, int nrhs, double* A, int lda, int stA, int* ipiv, int stP, double* B, int ldb, int stB, double* X, int ldx, int stX, double* work, size_t lwork, int* niters, int* info, int bc) { switch(api2marshal(API, INPLACE)) { case C_NORMAL: return hipsolverDDgesv( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info); case C_NORMAL_ALT: return hipsolverDDgesv( handle, n, nrhs, A, lda, ipiv, B, ldb, B, ldb, work, lwork, niters, info); case FORTRAN_NORMAL: return hipsolverDDgesvFortran( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info); case COMPAT_NORMAL: return hipsolverDnDDgesv( handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesv(testAPI_t API, bool INPLACE, hipsolverHandle_t handle, int n, int nrhs, hipsolverComplex* A, int lda, int stA, int* ipiv, int stP, hipsolverComplex* B, int ldb, int stB, hipsolverComplex* X, int ldx, int stX, hipsolverComplex* work, size_t lwork, int* niters, int* info, int bc) { switch(api2marshal(API, INPLACE)) { case C_NORMAL: return hipsolverCCgesv(handle, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, work, lwork, niters, info); case C_NORMAL_ALT: return hipsolverCCgesv(handle, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, (hipFloatComplex*)B, ldb, work, lwork, niters, info); case FORTRAN_NORMAL: return hipsolverCCgesvFortran(handle, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, work, lwork, niters, info); case COMPAT_NORMAL: return hipsolverDnCCgesv(handle, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, (hipFloatComplex*)X, ldx, work, lwork, niters, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesv(testAPI_t API, bool INPLACE, hipsolverHandle_t handle, int n, int nrhs, hipsolverDoubleComplex* A, int lda, int stA, int* ipiv, int stP, hipsolverDoubleComplex* B, int ldb, int stB, hipsolverDoubleComplex* X, int ldx, int stX, hipsolverDoubleComplex* work, size_t lwork, int* niters, int* info, int bc) { switch(api2marshal(API, INPLACE)) { case C_NORMAL: return hipsolverZZgesv(handle, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, work, lwork, niters, info); case C_NORMAL_ALT: return hipsolverZZgesv(handle, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)B, ldb, work, lwork, niters, info); case FORTRAN_NORMAL: return hipsolverZZgesvFortran(handle, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, work, lwork, niters, info); case COMPAT_NORMAL: return hipsolverDnZZgesv(handle, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)X, ldx, work, lwork, niters, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** GESVD ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_gesvd_bufferSize(testAPI_t API, hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, float* A, int lda, int* lwork) { switch(API) { case API_NORMAL: return hipsolverSgesvd_bufferSize(handle, jobu, jobv, m, n, lwork); case API_FORTRAN: return hipsolverSgesvd_bufferSizeFortran(handle, jobu, jobv, m, n, lwork); case API_COMPAT: return hipsolverDnSgesvd_bufferSize(handle, m, n, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvd_bufferSize(testAPI_t API, hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, double* A, int lda, int* lwork) { switch(API) { case API_NORMAL: return hipsolverDgesvd_bufferSize(handle, jobu, jobv, m, n, lwork); case API_FORTRAN: return hipsolverDgesvd_bufferSizeFortran(handle, jobu, jobv, m, n, lwork); case API_COMPAT: return hipsolverDnDgesvd_bufferSize(handle, m, n, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvd_bufferSize(testAPI_t API, hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, hipsolverComplex* A, int lda, int* lwork) { switch(API) { case API_NORMAL: return hipsolverCgesvd_bufferSize(handle, jobu, jobv, m, n, lwork); case API_FORTRAN: return hipsolverCgesvd_bufferSizeFortran(handle, jobu, jobv, m, n, lwork); case API_COMPAT: return hipsolverDnCgesvd_bufferSize(handle, m, n, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvd_bufferSize(testAPI_t API, hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, hipsolverDoubleComplex* A, int lda, int* lwork) { switch(API) { case API_NORMAL: return hipsolverZgesvd_bufferSize(handle, jobu, jobv, m, n, lwork); case API_FORTRAN: return hipsolverZgesvd_bufferSizeFortran(handle, jobu, jobv, m, n, lwork); case API_COMPAT: return hipsolverDnZgesvd_bufferSize(handle, m, n, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvd(testAPI_t API, bool NRWK, hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, float* A, int lda, int stA, float* S, int stS, float* U, int ldu, int stU, float* V, int ldv, int stV, float* work, int lwork, float* rwork, int stRW, int* info, int bc) { switch(api2marshal(API, NRWK)) { case C_NORMAL: return hipsolverSgesvd( handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info); case C_NORMAL_ALT: return hipsolverSgesvd( handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, nullptr, info); case FORTRAN_NORMAL: return hipsolverSgesvdFortran( handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info); case COMPAT_NORMAL: return hipsolverDnSgesvd( handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvd(testAPI_t API, bool NRWK, hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, double* A, int lda, int stA, double* S, int stS, double* U, int ldu, int stU, double* V, int ldv, int stV, double* work, int lwork, double* rwork, int stRW, int* info, int bc) { switch(api2marshal(API, NRWK)) { case C_NORMAL: return hipsolverDgesvd( handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info); case C_NORMAL_ALT: return hipsolverDgesvd( handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, nullptr, info); case FORTRAN_NORMAL: return hipsolverDgesvdFortran( handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info); case COMPAT_NORMAL: return hipsolverDnDgesvd( handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvd(testAPI_t API, bool NRWK, hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, hipsolverComplex* A, int lda, int stA, float* S, int stS, hipsolverComplex* U, int ldu, int stU, hipsolverComplex* V, int ldv, int stV, hipsolverComplex* work, int lwork, float* rwork, int stRW, int* info, int bc) { switch(api2marshal(API, NRWK)) { case C_NORMAL: return hipsolverCgesvd(handle, jobu, jobv, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, rwork, info); case C_NORMAL_ALT: return hipsolverCgesvd(handle, jobu, jobv, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, nullptr, info); case FORTRAN_NORMAL: return hipsolverCgesvdFortran(handle, jobu, jobv, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, rwork, info); case COMPAT_NORMAL: return hipsolverDnCgesvd(handle, jobu, jobv, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, rwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvd(testAPI_t API, bool NRWK, hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, hipsolverDoubleComplex* A, int lda, int stA, double* S, int stS, hipsolverDoubleComplex* U, int ldu, int stU, hipsolverDoubleComplex* V, int ldv, int stV, hipsolverDoubleComplex* work, int lwork, double* rwork, int stRW, int* info, int bc) { switch(api2marshal(API, NRWK)) { case C_NORMAL: return hipsolverZgesvd(handle, jobu, jobv, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, rwork, info); case C_NORMAL_ALT: return hipsolverZgesvd(handle, jobu, jobv, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, nullptr, info); case FORTRAN_NORMAL: return hipsolverZgesvdFortran(handle, jobu, jobv, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, rwork, info); case COMPAT_NORMAL: return hipsolverDnZgesvd(handle, jobu, jobv, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, rwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** GESVDJ ********************/ inline hipsolverStatus_t hipsolver_gesvdj_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, float* A, int lda, float* S, float* U, int ldu, float* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverSgesvdj_bufferSize( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params); case C_NORMAL_ALT: return hipsolverSgesvdjBatched_bufferSize( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, bc); case FORTRAN_NORMAL: return hipsolverSgesvdj_bufferSizeFortran( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params); case FORTRAN_NORMAL_ALT: return hipsolverSgesvdjBatched_bufferSizeFortran( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, bc); case COMPAT_NORMAL: return hipsolverDnSgesvdj_bufferSize( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params); case COMPAT_NORMAL_ALT: return hipsolverDnSgesvdjBatched_bufferSize( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvdj_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, double* A, int lda, double* S, double* U, int ldu, double* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverDgesvdj_bufferSize( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params); case C_NORMAL_ALT: return hipsolverDgesvdjBatched_bufferSize( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, bc); case FORTRAN_NORMAL: return hipsolverDgesvdj_bufferSizeFortran( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params); case FORTRAN_NORMAL_ALT: return hipsolverDgesvdjBatched_bufferSizeFortran( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, bc); case COMPAT_NORMAL: return hipsolverDnDgesvdj_bufferSize( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params); case COMPAT_NORMAL_ALT: return hipsolverDnDgesvdjBatched_bufferSize( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvdj_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, hipsolverComplex* A, int lda, float* S, hipsolverComplex* U, int ldu, hipsolverComplex* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverCgesvdj_bufferSize(handle, jobz, econ, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, lwork, params); case C_NORMAL_ALT: return hipsolverCgesvdjBatched_bufferSize(handle, jobz, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, lwork, params, bc); case FORTRAN_NORMAL: return hipsolverCgesvdj_bufferSizeFortran(handle, jobz, econ, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, lwork, params); case FORTRAN_NORMAL_ALT: return hipsolverCgesvdjBatched_bufferSizeFortran(handle, jobz, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, lwork, params, bc); case COMPAT_NORMAL: return hipsolverDnCgesvdj_bufferSize(handle, jobz, econ, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, lwork, params); case COMPAT_NORMAL_ALT: return hipsolverDnCgesvdjBatched_bufferSize(handle, jobz, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, lwork, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvdj_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, hipsolverDoubleComplex* A, int lda, double* S, hipsolverDoubleComplex* U, int ldu, hipsolverDoubleComplex* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverZgesvdj_bufferSize(handle, jobz, econ, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, lwork, params); case C_NORMAL_ALT: return hipsolverZgesvdjBatched_bufferSize(handle, jobz, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, lwork, params, bc); case FORTRAN_NORMAL: return hipsolverZgesvdj_bufferSizeFortran(handle, jobz, econ, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, lwork, params); case FORTRAN_NORMAL_ALT: return hipsolverZgesvdjBatched_bufferSizeFortran(handle, jobz, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, lwork, params, bc); case COMPAT_NORMAL: return hipsolverDnZgesvdj_bufferSize(handle, jobz, econ, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, lwork, params); case COMPAT_NORMAL_ALT: return hipsolverDnZgesvdjBatched_bufferSize(handle, jobz, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, lwork, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvdj(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, float* A, int lda, int stA, float* S, int stS, float* U, int ldu, int stU, float* V, int ldv, int stV, float* work, int lwork, int* info, hipsolverGesvdjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverSgesvdj( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params); case C_NORMAL_ALT: return hipsolverSgesvdjBatched( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, bc); case FORTRAN_NORMAL: return hipsolverSgesvdjFortran( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params); case FORTRAN_NORMAL_ALT: return hipsolverSgesvdjBatchedFortran( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, bc); case COMPAT_NORMAL: return hipsolverDnSgesvdj( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params); case COMPAT_NORMAL_ALT: return hipsolverDnSgesvdjBatched( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvdj(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, double* A, int lda, int stA, double* S, int stS, double* U, int ldu, int stU, double* V, int ldv, int stV, double* work, int lwork, int* info, hipsolverGesvdjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverDgesvdj( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params); case C_NORMAL_ALT: return hipsolverDgesvdjBatched( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, bc); case FORTRAN_NORMAL: return hipsolverDgesvdjFortran( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params); case FORTRAN_NORMAL_ALT: return hipsolverDgesvdjBatchedFortran( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, bc); case COMPAT_NORMAL: return hipsolverDnDgesvdj( handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params); case COMPAT_NORMAL_ALT: return hipsolverDnDgesvdjBatched( handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvdj(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, hipsolverComplex* A, int lda, int stA, float* S, int stS, hipsolverComplex* U, int ldu, int stU, hipsolverComplex* V, int ldv, int stV, hipsolverComplex* work, int lwork, int* info, hipsolverGesvdjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverCgesvdj(handle, jobz, econ, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, info, params); case C_NORMAL_ALT: return hipsolverCgesvdjBatched(handle, jobz, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, info, params, bc); case FORTRAN_NORMAL: return hipsolverCgesvdjFortran(handle, jobz, econ, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, info, params); case FORTRAN_NORMAL_ALT: return hipsolverCgesvdjBatchedFortran(handle, jobz, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, info, params, bc); case COMPAT_NORMAL: return hipsolverDnCgesvdj(handle, jobz, econ, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, info, params); case COMPAT_NORMAL_ALT: return hipsolverDnCgesvdjBatched(handle, jobz, m, n, (hipFloatComplex*)A, lda, S, (hipFloatComplex*)U, ldu, (hipFloatComplex*)V, ldv, (hipFloatComplex*)work, lwork, info, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvdj(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, hipsolverDoubleComplex* A, int lda, int stA, double* S, int stS, hipsolverDoubleComplex* U, int ldu, int stU, hipsolverDoubleComplex* V, int ldv, int stV, hipsolverDoubleComplex* work, int lwork, int* info, hipsolverGesvdjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverZgesvdj(handle, jobz, econ, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, info, params); case C_NORMAL_ALT: return hipsolverZgesvdjBatched(handle, jobz, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, info, params, bc); case FORTRAN_NORMAL: return hipsolverZgesvdjFortran(handle, jobz, econ, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, info, params); case FORTRAN_NORMAL_ALT: return hipsolverZgesvdjBatchedFortran(handle, jobz, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, info, params, bc); case COMPAT_NORMAL: return hipsolverDnZgesvdj(handle, jobz, econ, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, info, params); case COMPAT_NORMAL_ALT: return hipsolverDnZgesvdjBatched(handle, jobz, m, n, (hipDoubleComplex*)A, lda, S, (hipDoubleComplex*)U, ldu, (hipDoubleComplex*)V, ldv, (hipDoubleComplex*)work, lwork, info, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** GESVDA ********************/ inline hipsolverStatus_t hipsolver_gesvda_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, float* A, int lda, long long int stA, float* S, long long int stS, float* U, int ldu, long long int stU, float* V, int ldv, long long int stV, int* lwork, int bc) { switch(api2marshal(API, STRIDED)) { case COMPAT_NORMAL_ALT: return hipsolverDnSgesvdaStridedBatched_bufferSize( handle, jobz, rank, m, n, A, lda, stA, S, stS, U, ldu, stU, V, ldv, stV, lwork, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvda_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, double* A, int lda, long long int stA, double* S, long long int stS, double* U, int ldu, long long int stU, double* V, int ldv, long long int stV, int* lwork, int bc) { switch(api2marshal(API, STRIDED)) { case COMPAT_NORMAL_ALT: return hipsolverDnDgesvdaStridedBatched_bufferSize( handle, jobz, rank, m, n, A, lda, stA, S, stS, U, ldu, stU, V, ldv, stV, lwork, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvda_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, hipsolverComplex* A, int lda, long long int stA, float* S, long long int stS, hipsolverComplex* U, int ldu, long long int stU, hipsolverComplex* V, int ldv, long long int stV, int* lwork, int bc) { switch(api2marshal(API, STRIDED)) { case COMPAT_NORMAL_ALT: return hipsolverDnCgesvdaStridedBatched_bufferSize(handle, jobz, rank, m, n, (hipFloatComplex*)A, lda, stA, S, stS, (hipFloatComplex*)U, ldu, stU, (hipFloatComplex*)V, ldv, stV, lwork, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvda_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, hipsolverDoubleComplex* A, int lda, long long int stA, double* S, long long int stS, hipsolverDoubleComplex* U, int ldu, long long int stU, hipsolverDoubleComplex* V, int ldv, long long int stV, int* lwork, int bc) { switch(api2marshal(API, STRIDED)) { case COMPAT_NORMAL_ALT: return hipsolverDnZgesvdaStridedBatched_bufferSize(handle, jobz, rank, m, n, (hipDoubleComplex*)A, lda, stA, S, stS, (hipDoubleComplex*)U, ldu, stU, (hipDoubleComplex*)V, ldv, stV, lwork, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvda(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, float* A, int lda, int stA, float* S, int stS, float* U, int ldu, int stU, float* V, int ldv, int stV, float* work, int lwork, int* info, double* hRnrmF, int bc) { switch(api2marshal(API, STRIDED)) { case COMPAT_NORMAL_ALT: return hipsolverDnSgesvdaStridedBatched(handle, jobz, rank, m, n, A, lda, stA, S, stS, U, ldu, stU, V, ldv, stV, work, lwork, info, hRnrmF, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvda(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, double* A, int lda, int stA, double* S, int stS, double* U, int ldu, int stU, double* V, int ldv, int stV, double* work, int lwork, int* info, double* hRnrmF, int bc) { switch(api2marshal(API, STRIDED)) { case COMPAT_NORMAL_ALT: return hipsolverDnDgesvdaStridedBatched(handle, jobz, rank, m, n, A, lda, stA, S, stS, U, ldu, stU, V, ldv, stV, work, lwork, info, hRnrmF, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvda(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, hipsolverComplex* A, int lda, int stA, float* S, int stS, hipsolverComplex* U, int ldu, int stU, hipsolverComplex* V, int ldv, int stV, hipsolverComplex* work, int lwork, int* info, double* hRnrmF, int bc) { switch(api2marshal(API, STRIDED)) { case COMPAT_NORMAL_ALT: return hipsolverDnCgesvdaStridedBatched(handle, jobz, rank, m, n, (hipFloatComplex*)A, lda, stA, S, stS, (hipFloatComplex*)U, ldu, stU, (hipFloatComplex*)V, ldv, stV, (hipFloatComplex*)work, lwork, info, hRnrmF, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_gesvda(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, hipsolverDoubleComplex* A, int lda, int stA, double* S, int stS, hipsolverDoubleComplex* U, int ldu, int stU, hipsolverDoubleComplex* V, int ldv, int stV, hipsolverDoubleComplex* work, int lwork, int* info, double* hRnrmF, int bc) { switch(api2marshal(API, STRIDED)) { case COMPAT_NORMAL_ALT: return hipsolverDnZgesvdaStridedBatched(handle, jobz, rank, m, n, (hipDoubleComplex*)A, lda, stA, S, stS, (hipDoubleComplex*)U, ldu, stU, (hipDoubleComplex*)V, ldv, stV, (hipDoubleComplex*)work, lwork, info, hRnrmF, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** GETRF ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_getrf_bufferSize( testAPI_t API, hipsolverHandle_t handle, int m, int n, float* A, int lda, int* lwork) { switch(API) { case API_NORMAL: return hipsolverSgetrf_bufferSize(handle, m, n, A, lda, lwork); case API_FORTRAN: return hipsolverSgetrf_bufferSizeFortran(handle, m, n, A, lda, lwork); case API_COMPAT: return hipsolverDnSgetrf_bufferSize(handle, m, n, A, lda, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrf_bufferSize( testAPI_t API, hipsolverHandle_t handle, int m, int n, double* A, int lda, int* lwork) { switch(API) { case API_NORMAL: return hipsolverDgetrf_bufferSize(handle, m, n, A, lda, lwork); case API_FORTRAN: return hipsolverDgetrf_bufferSizeFortran(handle, m, n, A, lda, lwork); case API_COMPAT: return hipsolverDnDgetrf_bufferSize(handle, m, n, A, lda, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrf_bufferSize( testAPI_t API, hipsolverHandle_t handle, int m, int n, hipsolverComplex* A, int lda, int* lwork) { switch(API) { case API_NORMAL: return hipsolverCgetrf_bufferSize(handle, m, n, (hipFloatComplex*)A, lda, lwork); case API_FORTRAN: return hipsolverCgetrf_bufferSizeFortran(handle, m, n, (hipFloatComplex*)A, lda, lwork); case API_COMPAT: return hipsolverDnCgetrf_bufferSize(handle, m, n, (hipFloatComplex*)A, lda, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrf_bufferSize(testAPI_t API, hipsolverHandle_t handle, int m, int n, hipsolverDoubleComplex* A, int lda, int* lwork) { switch(API) { case API_NORMAL: return hipsolverZgetrf_bufferSize(handle, m, n, (hipDoubleComplex*)A, lda, lwork); case API_FORTRAN: return hipsolverZgetrf_bufferSizeFortran(handle, m, n, (hipDoubleComplex*)A, lda, lwork); case API_COMPAT: return hipsolverDnZgetrf_bufferSize(handle, m, n, (hipDoubleComplex*)A, lda, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrf(testAPI_t API, bool NPVT, hipsolverHandle_t handle, int m, int n, float* A, int lda, int stA, float* work, int lwork, int* ipiv, int stP, int* info, int bc) { switch(api2marshal(API, NPVT)) { case C_NORMAL: return hipsolverSgetrf(handle, m, n, A, lda, work, lwork, ipiv, info); case C_NORMAL_ALT: return hipsolverSgetrf(handle, m, n, A, lda, work, lwork, nullptr, info); case FORTRAN_NORMAL: return hipsolverSgetrfFortran(handle, m, n, A, lda, work, lwork, ipiv, info); case COMPAT_NORMAL: return hipsolverDnSgetrf(handle, m, n, A, lda, work, ipiv, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrf(testAPI_t API, bool NPVT, hipsolverHandle_t handle, int m, int n, double* A, int lda, int stA, double* work, int lwork, int* ipiv, int stP, int* info, int bc) { switch(api2marshal(API, NPVT)) { case C_NORMAL: return hipsolverDgetrf(handle, m, n, A, lda, work, lwork, ipiv, info); case C_NORMAL_ALT: return hipsolverDgetrf(handle, m, n, A, lda, work, lwork, nullptr, info); case FORTRAN_NORMAL: return hipsolverDgetrfFortran(handle, m, n, A, lda, work, lwork, ipiv, info); case COMPAT_NORMAL: return hipsolverDnDgetrf(handle, m, n, A, lda, work, ipiv, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrf(testAPI_t API, bool NPVT, hipsolverHandle_t handle, int m, int n, hipsolverComplex* A, int lda, int stA, hipsolverComplex* work, int lwork, int* ipiv, int stP, int* info, int bc) { switch(api2marshal(API, NPVT)) { case C_NORMAL: return hipsolverCgetrf( handle, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)work, lwork, ipiv, info); case C_NORMAL_ALT: return hipsolverCgetrf( handle, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)work, lwork, nullptr, info); case FORTRAN_NORMAL: return hipsolverCgetrfFortran( handle, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)work, lwork, ipiv, info); case COMPAT_NORMAL: return hipsolverDnCgetrf( handle, m, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)work, ipiv, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrf(testAPI_t API, bool NPVT, hipsolverHandle_t handle, int m, int n, hipsolverDoubleComplex* A, int lda, int stA, hipsolverDoubleComplex* work, int lwork, int* ipiv, int stP, int* info, int bc) { switch(api2marshal(API, NPVT)) { case C_NORMAL: return hipsolverZgetrf( handle, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)work, lwork, ipiv, info); case C_NORMAL_ALT: return hipsolverZgetrf( handle, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)work, lwork, nullptr, info); case FORTRAN_NORMAL: return hipsolverZgetrfFortran( handle, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)work, lwork, ipiv, info); case COMPAT_NORMAL: return hipsolverDnZgetrf( handle, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)work, ipiv, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** GETRS ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_getrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, float* A, int lda, int* ipiv, float* B, int ldb, int* lwork) { switch(API) { case API_NORMAL: return hipsolverSgetrs_bufferSize(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork); case API_FORTRAN: return hipsolverSgetrs_bufferSizeFortran( handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, double* A, int lda, int* ipiv, double* B, int ldb, int* lwork) { switch(API) { case API_NORMAL: return hipsolverDgetrs_bufferSize(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork); case API_FORTRAN: return hipsolverDgetrs_bufferSizeFortran( handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, hipsolverComplex* A, int lda, int* ipiv, hipsolverComplex* B, int ldb, int* lwork) { switch(API) { case API_NORMAL: return hipsolverCgetrs_bufferSize(handle, trans, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, lwork); case API_FORTRAN: return hipsolverCgetrs_bufferSizeFortran(handle, trans, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, hipsolverDoubleComplex* A, int lda, int* ipiv, hipsolverDoubleComplex* B, int ldb, int* lwork) { switch(API) { case API_NORMAL: return hipsolverZgetrs_bufferSize(handle, trans, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, lwork); case API_FORTRAN: return hipsolverZgetrs_bufferSizeFortran(handle, trans, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrs(testAPI_t API, hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, float* A, int lda, int stA, int* ipiv, int stP, float* B, int ldb, int stB, float* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverSgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info); case API_FORTRAN: return hipsolverSgetrsFortran( handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info); case API_COMPAT: return hipsolverDnSgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrs(testAPI_t API, hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, double* A, int lda, int stA, int* ipiv, int stP, double* B, int ldb, int stB, double* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverDgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info); case API_FORTRAN: return hipsolverDgetrsFortran( handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info); case API_COMPAT: return hipsolverDnDgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrs(testAPI_t API, hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, hipsolverComplex* A, int lda, int stA, int* ipiv, int stP, hipsolverComplex* B, int ldb, int stB, hipsolverComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverCgetrs(handle, trans, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, (hipFloatComplex*)work, lwork, info); case API_FORTRAN: return hipsolverCgetrsFortran(handle, trans, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, (hipFloatComplex*)work, lwork, info); case API_COMPAT: return hipsolverDnCgetrs( handle, trans, n, nrhs, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)B, ldb, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_getrs(testAPI_t API, hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, hipsolverDoubleComplex* A, int lda, int stA, int* ipiv, int stP, hipsolverDoubleComplex* B, int ldb, int stB, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverZgetrs(handle, trans, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)work, lwork, info); case API_FORTRAN: return hipsolverZgetrsFortran(handle, trans, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)work, lwork, info); case API_COMPAT: return hipsolverDnZgetrs(handle, trans, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** POTRF ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_potrf_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverSpotrf_bufferSize(handle, uplo, n, A, lda, lwork); case API_FORTRAN: return hipsolverSpotrf_bufferSizeFortran(handle, uplo, n, A, lda, lwork); case API_COMPAT: return hipsolverDnSpotrf_bufferSize(handle, uplo, n, A, lda, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverDpotrf_bufferSize(handle, uplo, n, A, lda, lwork); case API_FORTRAN: return hipsolverDpotrf_bufferSizeFortran(handle, uplo, n, A, lda, lwork); case API_COMPAT: return hipsolverDnDpotrf_bufferSize(handle, uplo, n, A, lda, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverCpotrf_bufferSize(handle, uplo, n, (hipFloatComplex*)A, lda, lwork); case API_FORTRAN: return hipsolverCpotrf_bufferSizeFortran(handle, uplo, n, (hipFloatComplex*)A, lda, lwork); case API_COMPAT: return hipsolverDnCpotrf_bufferSize(handle, uplo, n, (hipFloatComplex*)A, lda, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverZpotrf_bufferSize(handle, uplo, n, (hipDoubleComplex*)A, lda, lwork); case API_FORTRAN: return hipsolverZpotrf_bufferSizeFortran(handle, uplo, n, (hipDoubleComplex*)A, lda, lwork); case API_COMPAT: return hipsolverDnZpotrf_bufferSize(handle, uplo, n, (hipDoubleComplex*)A, lda, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, float* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverSpotrf(handle, uplo, n, A, lda, work, lwork, info); case API_FORTRAN: return hipsolverSpotrfFortran(handle, uplo, n, A, lda, work, lwork, info); case API_COMPAT: return hipsolverDnSpotrf(handle, uplo, n, A, lda, work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, double* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverDpotrf(handle, uplo, n, A, lda, work, lwork, info); case API_FORTRAN: return hipsolverDpotrfFortran(handle, uplo, n, A, lda, work, lwork, info); case API_COMPAT: return hipsolverDnDpotrf(handle, uplo, n, A, lda, work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, hipsolverComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverCpotrf( handle, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)work, lwork, info); case API_FORTRAN: return hipsolverCpotrfFortran( handle, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)work, lwork, info); case API_COMPAT: return hipsolverDnCpotrf( handle, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverZpotrf( handle, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)work, lwork, info); case API_FORTRAN: return hipsolverZpotrfFortran( handle, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)work, lwork, info); case API_COMPAT: return hipsolverDnZpotrf( handle, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } // batched inline hipsolverStatus_t hipsolver_potrf_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A[], int lda, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverSpotrfBatched_bufferSize(handle, uplo, n, A, lda, lwork, bc); case API_FORTRAN: return hipsolverSpotrfBatched_bufferSizeFortran(handle, uplo, n, A, lda, lwork, bc); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A[], int lda, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverDpotrfBatched_bufferSize(handle, uplo, n, A, lda, lwork, bc); case API_FORTRAN: return hipsolverDpotrfBatched_bufferSizeFortran(handle, uplo, n, A, lda, lwork, bc); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A[], int lda, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverCpotrfBatched_bufferSize( handle, uplo, n, (hipFloatComplex**)A, lda, lwork, bc); case API_FORTRAN: return hipsolverCpotrfBatched_bufferSizeFortran( handle, uplo, n, (hipFloatComplex**)A, lda, lwork, bc); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A[], int lda, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverZpotrfBatched_bufferSize( handle, uplo, n, (hipDoubleComplex**)A, lda, lwork, bc); case API_FORTRAN: return hipsolverZpotrfBatched_bufferSizeFortran( handle, uplo, n, (hipDoubleComplex**)A, lda, lwork, bc); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A[], int lda, int stA, float* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverSpotrfBatched(handle, uplo, n, A, lda, work, lwork, info, bc); case API_FORTRAN: return hipsolverSpotrfBatchedFortran(handle, uplo, n, A, lda, work, lwork, info, bc); case API_COMPAT: return hipsolverDnSpotrfBatched(handle, uplo, n, A, lda, info, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A[], int lda, int stA, double* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverDpotrfBatched(handle, uplo, n, A, lda, work, lwork, info, bc); case API_FORTRAN: return hipsolverDpotrfBatchedFortran(handle, uplo, n, A, lda, work, lwork, info, bc); case API_COMPAT: return hipsolverDnDpotrfBatched(handle, uplo, n, A, lda, info, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A[], int lda, int stA, hipsolverComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverCpotrfBatched( handle, uplo, n, (hipFloatComplex**)A, lda, (hipFloatComplex*)work, lwork, info, bc); case API_FORTRAN: return hipsolverCpotrfBatchedFortran( handle, uplo, n, (hipFloatComplex**)A, lda, (hipFloatComplex*)work, lwork, info, bc); case API_COMPAT: return hipsolverDnCpotrfBatched(handle, uplo, n, (hipFloatComplex**)A, lda, info, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrf(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A[], int lda, int stA, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverZpotrfBatched( handle, uplo, n, (hipDoubleComplex**)A, lda, (hipDoubleComplex*)work, lwork, info, bc); case API_FORTRAN: return hipsolverZpotrfBatchedFortran( handle, uplo, n, (hipDoubleComplex**)A, lda, (hipDoubleComplex*)work, lwork, info, bc); case API_COMPAT: return hipsolverDnZpotrfBatched(handle, uplo, n, (hipDoubleComplex**)A, lda, info, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** POTRI ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_potri_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverSpotri_bufferSize(handle, uplo, n, A, lda, lwork); else return hipsolverSpotri_bufferSizeFortran(handle, uplo, n, A, lda, lwork); } inline hipsolverStatus_t hipsolver_potri_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverDpotri_bufferSize(handle, uplo, n, A, lda, lwork); else return hipsolverDpotri_bufferSizeFortran(handle, uplo, n, A, lda, lwork); } inline hipsolverStatus_t hipsolver_potri_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverCpotri_bufferSize(handle, uplo, n, (hipFloatComplex*)A, lda, lwork); else return hipsolverCpotri_bufferSizeFortran(handle, uplo, n, (hipFloatComplex*)A, lda, lwork); } inline hipsolverStatus_t hipsolver_potri_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverZpotri_bufferSize(handle, uplo, n, (hipDoubleComplex*)A, lda, lwork); else return hipsolverZpotri_bufferSizeFortran(handle, uplo, n, (hipDoubleComplex*)A, lda, lwork); } inline hipsolverStatus_t hipsolver_potri(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, float* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverSpotri(handle, uplo, n, A, lda, work, lwork, info); else return hipsolverSpotriFortran(handle, uplo, n, A, lda, work, lwork, info); } inline hipsolverStatus_t hipsolver_potri(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, double* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverDpotri(handle, uplo, n, A, lda, work, lwork, info); else return hipsolverDpotriFortran(handle, uplo, n, A, lda, work, lwork, info); } inline hipsolverStatus_t hipsolver_potri(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, hipsolverComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverCpotri( handle, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)work, lwork, info); else return hipsolverCpotriFortran( handle, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_potri(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverZpotri( handle, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)work, lwork, info); else return hipsolverZpotriFortran( handle, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** POTRS ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_potrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, float* A, int lda, float* B, int ldb, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverSpotrs_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork); case API_FORTRAN: return hipsolverSpotrs_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, double* A, int lda, double* B, int ldb, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverDpotrs_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork); case API_FORTRAN: return hipsolverDpotrs_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverCpotrs_bufferSize( handle, uplo, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, lwork); case API_FORTRAN: return hipsolverCpotrs_bufferSizeFortran( handle, uplo, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverZpotrs_bufferSize( handle, uplo, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, lwork); case API_FORTRAN: return hipsolverZpotrs_bufferSizeFortran( handle, uplo, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, lwork); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, float* A, int lda, int stA, float* B, int ldb, int stB, float* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverSpotrs(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info); case API_FORTRAN: return hipsolverSpotrsFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info); case API_COMPAT: return hipsolverDnSpotrs(handle, uplo, n, nrhs, A, lda, B, ldb, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, double* A, int lda, int stA, double* B, int ldb, int stB, double* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverDpotrs(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info); case API_FORTRAN: return hipsolverDpotrsFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info); case API_COMPAT: return hipsolverDnDpotrs(handle, uplo, n, nrhs, A, lda, B, ldb, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipsolverComplex* A, int lda, int stA, hipsolverComplex* B, int ldb, int stB, hipsolverComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverCpotrs(handle, uplo, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, (hipFloatComplex*)work, lwork, info); case API_FORTRAN: return hipsolverCpotrsFortran(handle, uplo, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, (hipFloatComplex*)work, lwork, info); case API_COMPAT: return hipsolverDnCpotrs( handle, uplo, n, nrhs, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipsolverDoubleComplex* A, int lda, int stA, hipsolverDoubleComplex* B, int ldb, int stB, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverZpotrs(handle, uplo, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)work, lwork, info); case API_FORTRAN: return hipsolverZpotrsFortran(handle, uplo, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, (hipDoubleComplex*)work, lwork, info); case API_COMPAT: return hipsolverDnZpotrs( handle, uplo, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } // batched inline hipsolverStatus_t hipsolver_potrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, float* A[], int lda, float* B[], int ldb, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverSpotrsBatched_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, bc); case API_FORTRAN: return hipsolverSpotrsBatched_bufferSizeFortran( handle, uplo, n, nrhs, A, lda, B, ldb, lwork, bc); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, double* A[], int lda, double* B[], int ldb, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverDpotrsBatched_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, bc); case API_FORTRAN: return hipsolverDpotrsBatched_bufferSizeFortran( handle, uplo, n, nrhs, A, lda, B, ldb, lwork, bc); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipsolverComplex* A[], int lda, hipsolverComplex* B[], int ldb, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverCpotrsBatched_bufferSize( handle, uplo, n, nrhs, (hipFloatComplex**)A, lda, (hipFloatComplex**)B, ldb, lwork, bc); case API_FORTRAN: return hipsolverCpotrsBatched_bufferSizeFortran( handle, uplo, n, nrhs, (hipFloatComplex**)A, lda, (hipFloatComplex**)B, ldb, lwork, bc); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipsolverDoubleComplex* A[], int lda, hipsolverDoubleComplex* B[], int ldb, int* lwork, int bc) { switch(API) { case API_NORMAL: return hipsolverZpotrsBatched_bufferSize(handle, uplo, n, nrhs, (hipDoubleComplex**)A, lda, (hipDoubleComplex**)B, ldb, lwork, bc); case API_FORTRAN: return hipsolverZpotrsBatched_bufferSizeFortran(handle, uplo, n, nrhs, (hipDoubleComplex**)A, lda, (hipDoubleComplex**)B, ldb, lwork, bc); default: *lwork = 0; return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, float* A[], int lda, int stA, float* B[], int ldb, int stB, float* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverSpotrsBatched(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, bc); case API_FORTRAN: return hipsolverSpotrsBatchedFortran( handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, bc); case API_COMPAT: return hipsolverDnSpotrsBatched(handle, uplo, n, nrhs, A, lda, B, ldb, info, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, double* A[], int lda, int stA, double* B[], int ldb, int stB, double* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverDpotrsBatched(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, bc); case API_FORTRAN: return hipsolverDpotrsBatchedFortran( handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, bc); case API_COMPAT: return hipsolverDnDpotrsBatched(handle, uplo, n, nrhs, A, lda, B, ldb, info, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipsolverComplex* A[], int lda, int stA, hipsolverComplex* B[], int ldb, int stB, hipsolverComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverCpotrsBatched(handle, uplo, n, nrhs, (hipFloatComplex**)A, lda, (hipFloatComplex**)B, ldb, (hipFloatComplex*)work, lwork, info, bc); case API_FORTRAN: return hipsolverCpotrsBatchedFortran(handle, uplo, n, nrhs, (hipFloatComplex**)A, lda, (hipFloatComplex**)B, ldb, (hipFloatComplex*)work, lwork, info, bc); case API_COMPAT: return hipsolverDnCpotrsBatched( handle, uplo, n, nrhs, (hipFloatComplex**)A, lda, (hipFloatComplex**)B, ldb, info, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_potrs(testAPI_t API, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipsolverDoubleComplex* A[], int lda, int stA, hipsolverDoubleComplex* B[], int ldb, int stB, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { switch(API) { case API_NORMAL: return hipsolverZpotrsBatched(handle, uplo, n, nrhs, (hipDoubleComplex**)A, lda, (hipDoubleComplex**)B, ldb, (hipDoubleComplex*)work, lwork, info, bc); case API_FORTRAN: return hipsolverZpotrsBatchedFortran(handle, uplo, n, nrhs, (hipDoubleComplex**)A, lda, (hipDoubleComplex**)B, ldb, (hipDoubleComplex*)work, lwork, info, bc); case API_COMPAT: return hipsolverDnZpotrsBatched(handle, uplo, n, nrhs, (hipDoubleComplex**)A, lda, (hipDoubleComplex**)B, ldb, info, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** SYEVD/HEEVD ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_syevd_heevd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* W, int* lwork) { if(!FORTRAN) return hipsolverSsyevd_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork); else return hipsolverSsyevd_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork); } inline hipsolverStatus_t hipsolver_syevd_heevd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* W, int* lwork) { if(!FORTRAN) return hipsolverDsyevd_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork); else return hipsolverDsyevd_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork); } inline hipsolverStatus_t hipsolver_syevd_heevd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, float* W, int* lwork) { if(!FORTRAN) return hipsolverCheevd_bufferSize( handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, lwork); else return hipsolverCheevd_bufferSizeFortran( handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, lwork); } inline hipsolverStatus_t hipsolver_syevd_heevd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, double* W, int* lwork) { if(!FORTRAN) return hipsolverZheevd_bufferSize( handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, lwork); else return hipsolverZheevd_bufferSizeFortran( handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, lwork); } inline hipsolverStatus_t hipsolver_syevd_heevd(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, float* W, int stW, float* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverSsyevd(handle, jobz, uplo, n, A, lda, W, work, lwork, info); else return hipsolverSsyevdFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info); } inline hipsolverStatus_t hipsolver_syevd_heevd(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, double* W, int stW, double* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverDsyevd(handle, jobz, uplo, n, A, lda, W, work, lwork, info); else return hipsolverDsyevdFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info); } inline hipsolverStatus_t hipsolver_syevd_heevd(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, float* W, int stW, hipsolverComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverCheevd(handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, (hipFloatComplex*)work, lwork, info); else return hipsolverCheevdFortran(handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_syevd_heevd(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, double* W, int stW, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverZheevd(handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, (hipDoubleComplex*)work, lwork, info); else return hipsolverZheevdFortran(handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** SYEVDX/HEEVDX ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_syevdx_heevdx_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, float* A, int lda, float vl, float vu, int il, int iu, int* nev, float* W, int* lwork) { switch(api2marshal(API, false)) { case COMPAT_NORMAL: return hipsolverDnSsyevdx_bufferSize( handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, nev, W, lwork); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevdx_heevdx_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, double* A, int lda, double vl, double vu, int il, int iu, int* nev, double* W, int* lwork) { switch(api2marshal(API, false)) { case COMPAT_NORMAL: return hipsolverDnDsyevdx_bufferSize( handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, nev, W, lwork); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevdx_heevdx_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, float vl, float vu, int il, int iu, int* nev, float* W, int* lwork) { switch(api2marshal(API, false)) { case COMPAT_NORMAL: return hipsolverDnCheevdx_bufferSize( handle, jobz, range, uplo, n, (hipFloatComplex*)A, lda, vl, vu, il, iu, nev, W, lwork); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevdx_heevdx_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, double vl, double vu, int il, int iu, int* nev, double* W, int* lwork) { switch(api2marshal(API, false)) { case COMPAT_NORMAL: return hipsolverDnZheevdx_bufferSize( handle, jobz, range, uplo, n, (hipDoubleComplex*)A, lda, vl, vu, il, iu, nev, W, lwork); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevdx_heevdx(testAPI_t API, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, float vl, float vu, int il, int iu, int* nev, float* W, int stW, float* work, int lwork, int* info, int bc) { switch(api2marshal(API, false)) { case COMPAT_NORMAL: return hipsolverDnSsyevdx( handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, nev, W, work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevdx_heevdx(testAPI_t API, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, double vl, double vu, int il, int iu, int* nev, double* W, int stW, double* work, int lwork, int* info, int bc) { switch(api2marshal(API, false)) { case COMPAT_NORMAL: return hipsolverDnDsyevdx( handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, nev, W, work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevdx_heevdx(testAPI_t API, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, float vl, float vu, int il, int iu, int* nev, float* W, int stW, hipsolverComplex* work, int lwork, int* info, int bc) { switch(api2marshal(API, false)) { case COMPAT_NORMAL: return hipsolverDnCheevdx(handle, jobz, range, uplo, n, (hipFloatComplex*)A, lda, vl, vu, il, iu, nev, W, (hipFloatComplex*)work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevdx_heevdx(testAPI_t API, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, double vl, double vu, int il, int iu, int* nev, double* W, int stW, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { switch(api2marshal(API, false)) { case COMPAT_NORMAL: return hipsolverDnZheevdx(handle, jobz, range, uplo, n, (hipDoubleComplex*)A, lda, vl, vu, il, iu, nev, W, (hipDoubleComplex*)work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** SYEVJ/HEEVJ ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_syevj_heevj_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* W, int* lwork, hipsolverSyevjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverSsyevj_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params); case C_NORMAL_ALT: return hipsolverSsyevjBatched_bufferSize( handle, jobz, uplo, n, A, lda, W, lwork, params, bc); case FORTRAN_NORMAL: return hipsolverSsyevj_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params); case FORTRAN_NORMAL_ALT: return hipsolverSsyevjBatched_bufferSizeFortran( handle, jobz, uplo, n, A, lda, W, lwork, params, bc); case COMPAT_NORMAL: return hipsolverDnSsyevj_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params); case COMPAT_NORMAL_ALT: return hipsolverDnSsyevjBatched_bufferSize( handle, jobz, uplo, n, A, lda, W, lwork, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevj_heevj_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* W, int* lwork, hipsolverSyevjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverDsyevj_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params); case C_NORMAL_ALT: return hipsolverDsyevjBatched_bufferSize( handle, jobz, uplo, n, A, lda, W, lwork, params, bc); case FORTRAN_NORMAL: return hipsolverDsyevj_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params); case FORTRAN_NORMAL_ALT: return hipsolverDsyevjBatched_bufferSizeFortran( handle, jobz, uplo, n, A, lda, W, lwork, params, bc); case COMPAT_NORMAL: return hipsolverDnDsyevj_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params); case COMPAT_NORMAL_ALT: return hipsolverDnDsyevjBatched_bufferSize( handle, jobz, uplo, n, A, lda, W, lwork, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevj_heevj_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, float* W, int* lwork, hipsolverSyevjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverCheevj_bufferSize( handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, lwork, params); case C_NORMAL_ALT: return hipsolverCheevjBatched_bufferSize( handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, lwork, params, bc); case FORTRAN_NORMAL: return hipsolverCheevj_bufferSizeFortran( handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, lwork, params); case FORTRAN_NORMAL_ALT: return hipsolverCheevjBatched_bufferSizeFortran( handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, lwork, params, bc); case COMPAT_NORMAL: return hipsolverDnCheevj_bufferSize( handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, lwork, params); case COMPAT_NORMAL_ALT: return hipsolverDnCheevjBatched_bufferSize( handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, lwork, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevj_heevj_bufferSize(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, double* W, int* lwork, hipsolverSyevjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverZheevj_bufferSize( handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, lwork, params); case C_NORMAL_ALT: return hipsolverZheevjBatched_bufferSize( handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, lwork, params, bc); case FORTRAN_NORMAL: return hipsolverZheevj_bufferSizeFortran( handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, lwork, params); case FORTRAN_NORMAL_ALT: return hipsolverZheevjBatched_bufferSizeFortran( handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, lwork, params, bc); case COMPAT_NORMAL: return hipsolverDnZheevj_bufferSize( handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, lwork, params); case COMPAT_NORMAL_ALT: return hipsolverDnZheevjBatched_bufferSize( handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, lwork, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevj_heevj(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, float* W, int stW, float* work, int lwork, int* info, hipsolverSyevjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverSsyevj(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); case C_NORMAL_ALT: return hipsolverSsyevjBatched( handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, bc); case FORTRAN_NORMAL: return hipsolverSsyevjFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); case FORTRAN_NORMAL_ALT: return hipsolverSsyevjBatchedFortran( handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, bc); case COMPAT_NORMAL: return hipsolverDnSsyevj(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); case COMPAT_NORMAL_ALT: return hipsolverDnSsyevjBatched( handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevj_heevj(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, double* W, int stW, double* work, int lwork, int* info, hipsolverSyevjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverDsyevj(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); case C_NORMAL_ALT: return hipsolverDsyevjBatched( handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, bc); case FORTRAN_NORMAL: return hipsolverDsyevjFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); case FORTRAN_NORMAL_ALT: return hipsolverDsyevjBatchedFortran( handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, bc); case COMPAT_NORMAL: return hipsolverDnDsyevj(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); case COMPAT_NORMAL_ALT: return hipsolverDnDsyevjBatched( handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevj_heevj(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, float* W, int stW, hipsolverComplex* work, int lwork, int* info, hipsolverSyevjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverCheevj(handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, (hipFloatComplex*)work, lwork, info, params); case C_NORMAL_ALT: return hipsolverCheevjBatched(handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, (hipFloatComplex*)work, lwork, info, params, bc); case FORTRAN_NORMAL: return hipsolverCheevjFortran(handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, (hipFloatComplex*)work, lwork, info, params); case FORTRAN_NORMAL_ALT: return hipsolverCheevjBatchedFortran(handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, (hipFloatComplex*)work, lwork, info, params, bc); case COMPAT_NORMAL: return hipsolverDnCheevj(handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, (hipFloatComplex*)work, lwork, info, params); case COMPAT_NORMAL_ALT: return hipsolverDnCheevjBatched(handle, jobz, uplo, n, (hipFloatComplex*)A, lda, W, (hipFloatComplex*)work, lwork, info, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_syevj_heevj(testAPI_t API, bool STRIDED, hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, double* W, int stW, hipsolverDoubleComplex* work, int lwork, int* info, hipsolverSyevjInfo_t params, int bc) { switch(api2marshal(API, STRIDED)) { case C_NORMAL: return hipsolverZheevj(handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, (hipDoubleComplex*)work, lwork, info, params); case C_NORMAL_ALT: return hipsolverZheevjBatched(handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, (hipDoubleComplex*)work, lwork, info, params, bc); case FORTRAN_NORMAL: return hipsolverZheevjFortran(handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, (hipDoubleComplex*)work, lwork, info, params); case FORTRAN_NORMAL_ALT: return hipsolverZheevjBatchedFortran(handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, (hipDoubleComplex*)work, lwork, info, params, bc); case COMPAT_NORMAL: return hipsolverDnZheevj(handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, (hipDoubleComplex*)work, lwork, info, params); case COMPAT_NORMAL_ALT: return hipsolverDnZheevjBatched(handle, jobz, uplo, n, (hipDoubleComplex*)A, lda, W, (hipDoubleComplex*)work, lwork, info, params, bc); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** SYGVD/HEGVD ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_sygvd_hegvd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* B, int ldb, float* W, int* lwork) { if(!FORTRAN) return hipsolverSsygvd_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); else return hipsolverSsygvd_bufferSizeFortran( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); } inline hipsolverStatus_t hipsolver_sygvd_hegvd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* B, int ldb, double* W, int* lwork) { if(!FORTRAN) return hipsolverDsygvd_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); else return hipsolverDsygvd_bufferSizeFortran( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); } inline hipsolverStatus_t hipsolver_sygvd_hegvd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, float* W, int* lwork) { if(!FORTRAN) return hipsolverChegvd_bufferSize(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, lwork); else return hipsolverChegvd_bufferSizeFortran(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, lwork); } inline hipsolverStatus_t hipsolver_sygvd_hegvd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, double* W, int* lwork) { if(!FORTRAN) return hipsolverZhegvd_bufferSize(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, lwork); else return hipsolverZhegvd_bufferSizeFortran(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, lwork); } inline hipsolverStatus_t hipsolver_sygvd_hegvd(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, float* B, int ldb, int stB, float* W, int stW, float* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverSsygvd(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info); else return hipsolverSsygvdFortran( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info); } inline hipsolverStatus_t hipsolver_sygvd_hegvd(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, double* B, int ldb, int stB, double* W, int stW, double* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverDsygvd(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info); else return hipsolverDsygvdFortran( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info); } inline hipsolverStatus_t hipsolver_sygvd_hegvd(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, hipsolverComplex* B, int ldb, int stB, float* W, int stW, hipsolverComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverChegvd(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, (hipFloatComplex*)work, lwork, info); else return hipsolverChegvdFortran(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_sygvd_hegvd(bool FORTRAN, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, hipsolverDoubleComplex* B, int ldb, int stB, double* W, int stW, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverZhegvd(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, (hipDoubleComplex*)work, lwork, info); else return hipsolverZhegvdFortran(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** SYGVDX/HEGVDX ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_sygvdx_hegvdx_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, float* A, int lda, float* B, int ldb, float vl, float vu, int il, int iu, int* nev, float* W, int* lwork) { switch(API) { case API_COMPAT: return hipsolverDnSsygvdx_bufferSize( handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu, nev, W, lwork); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvdx_hegvdx_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, double* A, int lda, double* B, int ldb, double vl, double vu, int il, int iu, int* nev, double* W, int* lwork) { switch(API) { case API_COMPAT: return hipsolverDnDsygvdx_bufferSize( handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu, nev, W, lwork); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvdx_hegvdx_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, float vl, float vu, int il, int iu, int* nev, float* W, int* lwork) { switch(API) { case API_COMPAT: return hipsolverDnChegvdx_bufferSize(handle, itype, jobz, range, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, vl, vu, il, iu, nev, W, lwork); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvdx_hegvdx_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, double vl, double vu, int il, int iu, int* nev, double* W, int* lwork) { switch(API) { case API_COMPAT: return hipsolverDnZhegvdx_bufferSize(handle, itype, jobz, range, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, vl, vu, il, iu, nev, W, lwork); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvdx_hegvdx(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, float* B, int ldb, int stB, float vl, float vu, int il, int iu, int* nev, float* W, int stW, float* work, int lwork, int* info, int bc) { switch(API) { case API_COMPAT: return hipsolverDnSsygvdx(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu, nev, W, work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvdx_hegvdx(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, double* B, int ldb, int stB, double vl, double vu, int il, int iu, int* nev, double* W, int stW, double* work, int lwork, int* info, int bc) { switch(API) { case API_COMPAT: return hipsolverDnDsygvdx(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu, nev, W, work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvdx_hegvdx(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, hipsolverComplex* B, int ldb, int stB, float vl, float vu, int il, int iu, int* nev, float* W, int stW, hipsolverComplex* work, int lwork, int* info, int bc) { switch(API) { case API_COMPAT: return hipsolverDnChegvdx(handle, itype, jobz, range, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, vl, vu, il, iu, nev, W, (hipFloatComplex*)work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvdx_hegvdx(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, hipsolverDoubleComplex* B, int ldb, int stB, double vl, double vu, int il, int iu, int* nev, double* W, int stW, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { switch(API) { case API_COMPAT: return hipsolverDnZhegvdx(handle, itype, jobz, range, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, vl, vu, il, iu, nev, W, (hipDoubleComplex*)work, lwork, info); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** SYGVJ/HEGVJ ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_sygvj_hegvj_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* B, int ldb, float* W, int* lwork, hipsolverSyevjInfo_t params) { switch(API) { case API_NORMAL: return hipsolverSsygvj_bufferSize( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params); case API_FORTRAN: return hipsolverSsygvj_bufferSizeFortran( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params); case API_COMPAT: return hipsolverDnSsygvj_bufferSize( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvj_hegvj_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* B, int ldb, double* W, int* lwork, hipsolverSyevjInfo_t params) { switch(API) { case API_NORMAL: return hipsolverDsygvj_bufferSize( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params); case API_FORTRAN: return hipsolverDsygvj_bufferSizeFortran( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params); case API_COMPAT: return hipsolverDnDsygvj_bufferSize( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvj_hegvj_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, hipsolverComplex* B, int ldb, float* W, int* lwork, hipsolverSyevjInfo_t params) { switch(API) { case API_NORMAL: return hipsolverChegvj_bufferSize(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, lwork, params); case API_FORTRAN: return hipsolverChegvj_bufferSizeFortran(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, lwork, params); case API_COMPAT: return hipsolverDnChegvj_bufferSize(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, lwork, params); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvj_hegvj_bufferSize(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, hipsolverDoubleComplex* B, int ldb, double* W, int* lwork, hipsolverSyevjInfo_t params) { switch(API) { case API_NORMAL: return hipsolverZhegvj_bufferSize(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, lwork, params); case API_FORTRAN: return hipsolverZhegvj_bufferSizeFortran(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, lwork, params); case API_COMPAT: return hipsolverDnZhegvj_bufferSize(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, lwork, params); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvj_hegvj(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, float* B, int ldb, int stB, float* W, int stW, float* work, int lwork, int* info, hipsolverSyevjInfo_t params, int bc) { switch(API) { case API_NORMAL: return hipsolverSsygvj( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params); case API_FORTRAN: return hipsolverSsygvjFortran( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params); case API_COMPAT: return hipsolverDnSsygvj( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvj_hegvj(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, double* B, int ldb, int stB, double* W, int stW, double* work, int lwork, int* info, hipsolverSyevjInfo_t params, int bc) { switch(API) { case API_NORMAL: return hipsolverDsygvj( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params); case API_FORTRAN: return hipsolverDsygvjFortran( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params); case API_COMPAT: return hipsolverDnDsygvj( handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvj_hegvj(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, hipsolverComplex* B, int ldb, int stB, float* W, int stW, hipsolverComplex* work, int lwork, int* info, hipsolverSyevjInfo_t params, int bc) { switch(API) { case API_NORMAL: return hipsolverChegvj(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, (hipFloatComplex*)work, lwork, info, params); case API_FORTRAN: return hipsolverChegvjFortran(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, (hipFloatComplex*)work, lwork, info, params); case API_COMPAT: return hipsolverDnChegvj(handle, itype, jobz, uplo, n, (hipFloatComplex*)A, lda, (hipFloatComplex*)B, ldb, W, (hipFloatComplex*)work, lwork, info, params); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } inline hipsolverStatus_t hipsolver_sygvj_hegvj(testAPI_t API, hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, hipsolverDoubleComplex* B, int ldb, int stB, double* W, int stW, hipsolverDoubleComplex* work, int lwork, int* info, hipsolverSyevjInfo_t params, int bc) { switch(API) { case API_NORMAL: return hipsolverZhegvj(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, (hipDoubleComplex*)work, lwork, info, params); case API_FORTRAN: return hipsolverZhegvjFortran(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, (hipDoubleComplex*)work, lwork, info, params); case API_COMPAT: return hipsolverDnZhegvj(handle, itype, jobz, uplo, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, W, (hipDoubleComplex*)work, lwork, info, params); default: return HIPSOLVER_STATUS_NOT_SUPPORTED; } } /********************************************************/ /******************** SYTRD/HETRD ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_sytrd_hetrd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, float* D, float* E, float* tau, int* lwork) { if(!FORTRAN) return hipsolverSsytrd_bufferSize(handle, uplo, n, A, lda, D, E, tau, lwork); else return hipsolverSsytrd_bufferSizeFortran(handle, uplo, n, A, lda, D, E, tau, lwork); } inline hipsolverStatus_t hipsolver_sytrd_hetrd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, double* D, double* E, double* tau, int* lwork) { if(!FORTRAN) return hipsolverDsytrd_bufferSize(handle, uplo, n, A, lda, D, E, tau, lwork); else return hipsolverDsytrd_bufferSizeFortran(handle, uplo, n, A, lda, D, E, tau, lwork); } inline hipsolverStatus_t hipsolver_sytrd_hetrd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, float* D, float* E, hipsolverComplex* tau, int* lwork) { if(!FORTRAN) return hipsolverChetrd_bufferSize( handle, uplo, n, (hipFloatComplex*)A, lda, D, E, (hipFloatComplex*)tau, lwork); else return hipsolverChetrd_bufferSizeFortran( handle, uplo, n, (hipFloatComplex*)A, lda, D, E, (hipFloatComplex*)tau, lwork); } inline hipsolverStatus_t hipsolver_sytrd_hetrd_bufferSize(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, double* D, double* E, hipsolverDoubleComplex* tau, int* lwork) { if(!FORTRAN) return hipsolverZhetrd_bufferSize( handle, uplo, n, (hipDoubleComplex*)A, lda, D, E, (hipDoubleComplex*)tau, lwork); else return hipsolverZhetrd_bufferSizeFortran( handle, uplo, n, (hipDoubleComplex*)A, lda, D, E, (hipDoubleComplex*)tau, lwork); } inline hipsolverStatus_t hipsolver_sytrd_hetrd(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, float* D, int stD, float* E, int stE, float* tau, int stP, float* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverSsytrd(handle, uplo, n, A, lda, D, E, tau, work, lwork, info); else return hipsolverSsytrdFortran(handle, uplo, n, A, lda, D, E, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_sytrd_hetrd(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, double* D, int stD, double* E, int stE, double* tau, int stP, double* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverDsytrd(handle, uplo, n, A, lda, D, E, tau, work, lwork, info); else return hipsolverDsytrdFortran(handle, uplo, n, A, lda, D, E, tau, work, lwork, info); } inline hipsolverStatus_t hipsolver_sytrd_hetrd(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, float* D, int stD, float* E, int stE, hipsolverComplex* tau, int stP, hipsolverComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverChetrd(handle, uplo, n, (hipFloatComplex*)A, lda, D, E, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); else return hipsolverChetrdFortran(handle, uplo, n, (hipFloatComplex*)A, lda, D, E, (hipFloatComplex*)tau, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_sytrd_hetrd(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, double* D, int stD, double* E, int stE, hipsolverDoubleComplex* tau, int stP, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverZhetrd(handle, uplo, n, (hipDoubleComplex*)A, lda, D, E, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); else return hipsolverZhetrdFortran(handle, uplo, n, (hipDoubleComplex*)A, lda, D, E, (hipDoubleComplex*)tau, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ /******************** SYTRF ********************/ // normal and strided_batched inline hipsolverStatus_t hipsolver_sytrf_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int n, float* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverSsytrf_bufferSize(handle, n, A, lda, lwork); else return hipsolverSsytrf_bufferSizeFortran(handle, n, A, lda, lwork); } inline hipsolverStatus_t hipsolver_sytrf_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int n, double* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverDsytrf_bufferSize(handle, n, A, lda, lwork); else return hipsolverDsytrf_bufferSizeFortran(handle, n, A, lda, lwork); } inline hipsolverStatus_t hipsolver_sytrf_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int n, hipsolverComplex* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverCsytrf_bufferSize(handle, n, (hipFloatComplex*)A, lda, lwork); else return hipsolverCsytrf_bufferSizeFortran(handle, n, (hipFloatComplex*)A, lda, lwork); } inline hipsolverStatus_t hipsolver_sytrf_bufferSize( bool FORTRAN, hipsolverHandle_t handle, int n, hipsolverDoubleComplex* A, int lda, int* lwork) { if(!FORTRAN) return hipsolverZsytrf_bufferSize(handle, n, (hipDoubleComplex*)A, lda, lwork); else return hipsolverZsytrf_bufferSizeFortran(handle, n, (hipDoubleComplex*)A, lda, lwork); } inline hipsolverStatus_t hipsolver_sytrf(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, int stA, int* ipiv, int stP, float* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverSsytrf(handle, uplo, n, A, lda, ipiv, work, lwork, info); else return hipsolverSsytrfFortran(handle, uplo, n, A, lda, ipiv, work, lwork, info); } inline hipsolverStatus_t hipsolver_sytrf(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, int stA, int* ipiv, int stP, double* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverDsytrf(handle, uplo, n, A, lda, ipiv, work, lwork, info); else return hipsolverDsytrfFortran(handle, uplo, n, A, lda, ipiv, work, lwork, info); } inline hipsolverStatus_t hipsolver_sytrf(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverComplex* A, int lda, int stA, int* ipiv, int stP, hipsolverComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverCsytrf( handle, uplo, n, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)work, lwork, info); else return hipsolverCsytrfFortran( handle, uplo, n, (hipFloatComplex*)A, lda, ipiv, (hipFloatComplex*)work, lwork, info); } inline hipsolverStatus_t hipsolver_sytrf(bool FORTRAN, hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipsolverDoubleComplex* A, int lda, int stA, int* ipiv, int stP, hipsolverDoubleComplex* work, int lwork, int* info, int bc) { if(!FORTRAN) return hipsolverZsytrf( handle, uplo, n, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)work, lwork, info); else return hipsolverZsytrfFortran( handle, uplo, n, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)work, lwork, info); } /********************************************************/ hipSOLVER-rocm-5.5.1/clients/include/hipsolver_datatype2string.hpp000066400000000000000000000076741436107207300252300ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include #include #include "complex.hpp" #include "hipsolver.h" // Complex output inline std::ostream& operator<<(std::ostream& os, const hipsolverComplex& x) { os << "'(" << x.real() << "," << x.imag() << ")'"; return os; } inline std::ostream& operator<<(std::ostream& os, const hipsolverDoubleComplex& x) { os << "'(" << x.real() << "," << x.imag() << ")'"; return os; } /* ============================================================================================ */ /* Convert hipsolver constants to lapack char. */ inline constexpr auto hipsolver2string_status(hipsolverStatus_t value) { switch(value) { case HIPSOLVER_STATUS_SUCCESS: return "HIPSOLVER_STATUS_SUCCESS"; case HIPSOLVER_STATUS_NOT_INITIALIZED: return "HIPSOLVER_STATUS_NOT_INITIALIZED"; case HIPSOLVER_STATUS_ALLOC_FAILED: return "HIPSOLVER_STATUS_ALLOC_FAILED"; case HIPSOLVER_STATUS_INVALID_VALUE: return "HIPSOLVER_STATUS_INVALID_VALUE"; case HIPSOLVER_STATUS_MAPPING_ERROR: return "HIPSOLVER_STATUS_MAPPING_ERROR"; case HIPSOLVER_STATUS_EXECUTION_FAILED: return "HIPSOLVER_STATUS_EXECUTION_FAILED"; case HIPSOLVER_STATUS_INTERNAL_ERROR: return "HIPSOLVER_STATUS_INTERNAL_ERROR"; case HIPSOLVER_STATUS_NOT_SUPPORTED: return "HIPSOLVER_STATUS_NOT_SUPPORTED"; case HIPSOLVER_STATUS_ARCH_MISMATCH: return "HIPSOLVER_STATUS_ARCH_MISMATCH"; case HIPSOLVER_STATUS_HANDLE_IS_NULLPTR: return "HIPSOLVER_STATUS_HANDLE_IS_NULLPTR"; case HIPSOLVER_STATUS_INVALID_ENUM: return "HIPSOLVER_STATUS_INVALID_ENUM"; case HIPSOLVER_STATUS_UNKNOWN: return "HIPSOLVER_STATUS_UNKNOWN"; default: throw std::invalid_argument("Invalid enum"); } } char hipsolver2char_operation(hipsolverOperation_t value); char hipsolver2char_fill(hipsolverFillMode_t value); char hipsolver2char_side(hipsolverSideMode_t value); char hipsolver2char_evect(hipsolverEigMode_t value); char hipsolver2char_eform(hipsolverEigType_t value); char hipsolver2char_erange(hipsolverEigRange_t value); /* ============================================================================================ */ /* Convert lapack char constants to hipsolver type. */ hipsolverStatus_t string2hipsolver_status(const std::string& value); hipsolverOperation_t char2hipsolver_operation(char value); hipsolverFillMode_t char2hipsolver_fill(char value); hipsolverSideMode_t char2hipsolver_side(char value); hipsolverEigMode_t char2hipsolver_evect(char value); hipsolverEigType_t char2hipsolver_eform(char value); hipsolverEigRange_t char2hipsolver_erange(char value); hipSOLVER-rocm-5.5.1/clients/include/hipsolver_dispatcher.hpp000066400000000000000000000207021436107207300242150ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "../rocsolvercommon/rocsolver_arguments.hpp" #include #include #include "testing_gebrd.hpp" #include "testing_gels.hpp" #include "testing_geqrf.hpp" #include "testing_gesv.hpp" #include "testing_gesvd.hpp" #include "testing_gesvda.hpp" #include "testing_gesvdj.hpp" #include "testing_getrf.hpp" #include "testing_getrs.hpp" #include "testing_orgbr_ungbr.hpp" #include "testing_orgqr_ungqr.hpp" #include "testing_orgtr_ungtr.hpp" #include "testing_ormqr_unmqr.hpp" #include "testing_ormtr_unmtr.hpp" #include "testing_potrf.hpp" #include "testing_potri.hpp" #include "testing_potrs.hpp" #include "testing_syevd_heevd.hpp" #include "testing_syevdx_heevdx.hpp" #include "testing_syevj_heevj.hpp" #include "testing_sygvd_hegvd.hpp" #include "testing_sygvdx_hegvdx.hpp" #include "testing_sygvj_hegvj.hpp" #include "testing_sytrd_hetrd.hpp" #include "testing_sytrf.hpp" struct str_less { bool operator()(const char* a, const char* b) const { return strcmp(a, b) < 0; } }; // Map from const char* to function taking Arguments& using lexicographical comparison using func_map = std::map; // Function dispatcher for hipSOLVER tests class hipsolver_dispatcher { template static hipsolverStatus_t run_function(const char* name, Arguments& argus) { // Map for functions that support all precisions static const func_map map = { {"gebrd", testing_gebrd}, {"gels", testing_gels}, {"geqrf", testing_geqrf}, {"gesv", testing_gesv}, {"gesvd", testing_gesvd}, {"gesvda_strided_batched", testing_gesvda}, {"gesvdj", testing_gesvdj}, {"gesvdj_batched", testing_gesvdj}, {"getrf", testing_getrf}, {"getrs", testing_getrs}, {"potrf", testing_potrf}, {"potrf_batched", testing_potrf}, {"potri", testing_potri}, {"potrs", testing_potrs}, {"potrs_batched", testing_potrs}, {"sytrf", testing_sytrf}, }; // Grab function from the map and execute auto match = map.find(name); if(match != map.end()) { match->second(argus); return HIPSOLVER_STATUS_SUCCESS; } else return HIPSOLVER_STATUS_INVALID_VALUE; } template , int> = 0> static hipsolverStatus_t run_function_limited_precision(const char* name, Arguments& argus) { // Map for functions that support single and double precisions static const func_map map_real = { {"orgbr", testing_orgbr_ungbr}, {"orgqr", testing_orgqr_ungqr}, {"orgtr", testing_orgtr_ungtr}, {"ormqr", testing_ormqr_unmqr}, {"ormtr", testing_ormtr_unmtr}, {"syevd", testing_syevd_heevd}, {"syevdx", testing_syevdx_heevdx}, {"syevj", testing_syevj_heevj}, {"syevj_batched", testing_syevj_heevj}, {"sygvd", testing_sygvd_hegvd}, {"sygvdx", testing_sygvdx_hegvdx}, {"sygvj", testing_sygvj_hegvj}, {"sytrd", testing_sytrd_hetrd}, }; // Grab function from the map and execute auto match = map_real.find(name); if(match != map_real.end()) { match->second(argus); return HIPSOLVER_STATUS_SUCCESS; } else return HIPSOLVER_STATUS_INVALID_VALUE; } template , int> = 0> static hipsolverStatus_t run_function_limited_precision(const char* name, Arguments& argus) { // Map for functions that support single complex and double complex precisions static const func_map map_complex = { {"ungbr", testing_orgbr_ungbr}, {"ungqr", testing_orgqr_ungqr}, {"ungtr", testing_orgtr_ungtr}, {"unmqr", testing_ormqr_unmqr}, {"unmtr", testing_ormtr_unmtr}, {"heevd", testing_syevd_heevd}, {"heevdx", testing_syevdx_heevdx}, {"heevj", testing_syevj_heevj}, {"heevj_batched", testing_syevj_heevj}, {"hegvd", testing_sygvd_hegvd}, {"hegvdx", testing_sygvdx_hegvdx}, {"hegvj", testing_sygvj_hegvj}, {"hetrd", testing_sytrd_hetrd}, }; // Grab function from the map and execute auto match = map_complex.find(name); if(match != map_complex.end()) { match->second(argus); return HIPSOLVER_STATUS_SUCCESS; } else return HIPSOLVER_STATUS_INVALID_VALUE; } public: static void invoke(const std::string& name, char precision, Arguments& argus) { hipsolverStatus_t status; if(precision == 's') status = run_function(name.c_str(), argus); else if(precision == 'd') status = run_function(name.c_str(), argus); else if(precision == 'c') status = run_function(name.c_str(), argus); else if(precision == 'z') status = run_function(name.c_str(), argus); else throw std::invalid_argument("Invalid value for --precision"); if(status == HIPSOLVER_STATUS_INVALID_VALUE) { if(precision == 's') status = run_function_limited_precision(name.c_str(), argus); else if(precision == 'd') status = run_function_limited_precision(name.c_str(), argus); else if(precision == 'c') status = run_function_limited_precision(name.c_str(), argus); else if(precision == 'z') status = run_function_limited_precision(name.c_str(), argus); } if(status == HIPSOLVER_STATUS_INVALID_VALUE) { std::string msg = "Invalid combination --function "; msg += name; msg += " --precision "; msg += precision; throw std::invalid_argument(msg); } } }; hipSOLVER-rocm-5.5.1/clients/include/hipsolver_fortran.f90000066400000000000000000005256511436107207300233660ustar00rootroot00000000000000!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. ! ! Permission is hereby granted, free of charge, to any person obtaining a copy ! of this software and associated documentation files (the "Software"), to deal ! in the Software without restriction, including without limitation the rights ! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ! copies of the Software, and to permit persons to whom the Software is ! furnished to do so, subject to the following conditions: ! ! The above copyright notice and this permission notice shall be included in ! all copies or substantial portions of the Software. ! ! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ! THE SOFTWARE. ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! module hipsolver_interface use iso_c_binding use hipsolver contains !------------! ! LAPACK ! !------------! ! ******************** ORGBR/UNGBR ******************** function hipsolverSorgbr_bufferSizeFortran(handle, side, m, n, k, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverSorgbr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSorgbr_bufferSize(handle, side, m, n, k, A, lda, tau, lwork) end function hipsolverSorgbr_bufferSizeFortran function hipsolverDorgbr_bufferSizeFortran(handle, side, m, n, k, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverDorgbr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDorgbr_bufferSize(handle, side, m, n, k, A, lda, tau, lwork) end function hipsolverDorgbr_bufferSizeFortran function hipsolverCungbr_bufferSizeFortran(handle, side, m, n, k, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverCungbr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCungbr_bufferSize(handle, side, m, n, k, A, lda, tau, lwork) end function hipsolverCungbr_bufferSizeFortran function hipsolverZungbr_bufferSizeFortran(handle, side, m, n, k, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverZungbr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZungbr_bufferSize(handle, side, m, n, k, A, lda, tau, lwork) end function hipsolverZungbr_bufferSizeFortran function hipsolverSorgbrFortran(handle, side, m, n, k, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSorgbrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSorgbr(handle, side, m, n, k, A, lda, tau, work, lwork, info) end function hipsolverSorgbrFortran function hipsolverDorgbrFortran(handle, side, m, n, k, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDorgbrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDorgbr(handle, side, m, n, k, A, lda, tau, work, lwork, info) end function hipsolverDorgbrFortran function hipsolverCungbrFortran(handle, side, m, n, k, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCungbrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCungbr(handle, side, m, n, k, A, lda, tau, work, lwork, info) end function hipsolverCungbrFortran function hipsolverZungbrFortran(handle, side, m, n, k, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZungbrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZungbr(handle, side, m, n, k, A, lda, tau, work, lwork, info) end function hipsolverZungbrFortran ! ******************** ORGQR/UNGQR ******************** function hipsolverSorgqr_bufferSizeFortran(handle, m, n, k, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverSorgqr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSorgqr_bufferSize(handle, m, n, k, A, lda, tau, lwork) end function hipsolverSorgqr_bufferSizeFortran function hipsolverDorgqr_bufferSizeFortran(handle, m, n, k, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverDorgqr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDorgqr_bufferSize(handle, m, n, k, A, lda, tau, lwork) end function hipsolverDorgqr_bufferSizeFortran function hipsolverCungqr_bufferSizeFortran(handle, m, n, k, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverCungqr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCungqr_bufferSize(handle, m, n, k, A, lda, tau, lwork) end function hipsolverCungqr_bufferSizeFortran function hipsolverZungqr_bufferSizeFortran(handle, m, n, k, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverZungqr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZungqr_bufferSize(handle, m, n, k, A, lda, tau, lwork) end function hipsolverZungqr_bufferSizeFortran function hipsolverSorgqrFortran(handle, m, n, k, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSorgqrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSorgqr(handle, m, n, k, A, lda, tau, work, lwork, info) end function hipsolverSorgqrFortran function hipsolverDorgqrFortran(handle, m, n, k, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDorgqrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDorgqr(handle, m, n, k, A, lda, tau, work, lwork, info) end function hipsolverDorgqrFortran function hipsolverCungqrFortran(handle, m, n, k, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCungqrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCungqr(handle, m, n, k, A, lda, tau, work, lwork, info) end function hipsolverCungqrFortran function hipsolverZungqrFortran(handle, m, n, k, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZungqrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZungqr(handle, m, n, k, A, lda, tau, work, lwork, info) end function hipsolverZungqrFortran ! ******************** ORGTR/UNGTR ******************** function hipsolverSorgtr_bufferSizeFortran(handle, uplo, n, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverSorgtr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSorgtr_bufferSize(handle, uplo, n, A, lda, tau, lwork) end function hipsolverSorgtr_bufferSizeFortran function hipsolverDorgtr_bufferSizeFortran(handle, uplo, n, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverDorgtr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDorgtr_bufferSize(handle, uplo, n, A, lda, tau, lwork) end function hipsolverDorgtr_bufferSizeFortran function hipsolverCungtr_bufferSizeFortran(handle, uplo, n, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverCungtr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCungtr_bufferSize(handle, uplo, n, A, lda, tau, lwork) end function hipsolverCungtr_bufferSizeFortran function hipsolverZungtr_bufferSizeFortran(handle, uplo, n, A, lda, tau, lwork) & result(res) & bind(c, name = 'hipsolverZungtr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZungtr_bufferSize(handle, uplo, n, A, lda, tau, lwork) end function hipsolverZungtr_bufferSizeFortran function hipsolverSorgtrFortran(handle, uplo, n, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSorgtrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSorgtr(handle, uplo, n, A, lda, tau, work, lwork, info) end function hipsolverSorgtrFortran function hipsolverDorgtrFortran(handle, uplo, n, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDorgtrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDorgtr(handle, uplo, n, A, lda, tau, work, lwork, info) end function hipsolverDorgtrFortran function hipsolverCungtrFortran(handle, uplo, n, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCungtrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCungtr(handle, uplo, n, A, lda, tau, work, lwork, info) end function hipsolverCungtrFortran function hipsolverZungtrFortran(handle, uplo, n, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZungtrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZungtr(handle, uplo, n, A, lda, tau, work, lwork, info) end function hipsolverZungtrFortran ! ******************** ORMQR/UNMQR ******************** function hipsolverSormqr_bufferSizeFortran(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork) & result(res) & bind(c, name = 'hipsolverSormqr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSormqr_bufferSize(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork) end function hipsolverSormqr_bufferSizeFortran function hipsolverDormqr_bufferSizeFortran(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork) & result(res) & bind(c, name = 'hipsolverDormqr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDormqr_bufferSize(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork) end function hipsolverDormqr_bufferSizeFortran function hipsolverCunmqr_bufferSizeFortran(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork) & result(res) & bind(c, name = 'hipsolverCunmqr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCunmqr_bufferSize(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork) end function hipsolverCunmqr_bufferSizeFortran function hipsolverZunmqr_bufferSizeFortran(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork) & result(res) & bind(c, name = 'hipsolverZunmqr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZunmqr_bufferSize(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork) end function hipsolverZunmqr_bufferSizeFortran function hipsolverSormqrFortran(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSormqrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSormqr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info) end function hipsolverSormqrFortran function hipsolverDormqrFortran(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDormqrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDormqr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info) end function hipsolverDormqrFortran function hipsolverCunmqrFortran(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCunmqrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCunmqr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info) end function hipsolverCunmqrFortran function hipsolverZunmqrFortran(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZunmqrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: k type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZunmqr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info) end function hipsolverZunmqrFortran ! ******************** ORMTR/UNMTR ******************** function hipsolverSormtr_bufferSizeFortran(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork) & result(res) & bind(c, name = 'hipsolverSormtr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSormtr_bufferSize(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork) end function hipsolverSormtr_bufferSizeFortran function hipsolverDormtr_bufferSizeFortran(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork) & result(res) & bind(c, name = 'hipsolverDormtr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDormtr_bufferSize(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork) end function hipsolverDormtr_bufferSizeFortran function hipsolverCunmtr_bufferSizeFortran(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork) & result(res) & bind(c, name = 'hipsolverCunmtr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCunmtr_bufferSize(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork) end function hipsolverCunmtr_bufferSizeFortran function hipsolverZunmtr_bufferSizeFortran(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork) & result(res) & bind(c, name = 'hipsolverZunmtr_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZunmtr_bufferSize(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork) end function hipsolverZunmtr_bufferSizeFortran function hipsolverSormtrFortran(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSormtrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSormtr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info) end function hipsolverSormtrFortran function hipsolverDormtrFortran(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDormtrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDormtr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info) end function hipsolverDormtrFortran function hipsolverCunmtrFortran(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCunmtrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCunmtr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info) end function hipsolverCunmtrFortran function hipsolverZunmtrFortran(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZunmtrFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_SIDE_LEFT)), value :: side integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: C integer(c_int), value :: ldc type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZunmtr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info) end function hipsolverZunmtrFortran ! ******************** GEBRD ******************** function hipsolverSgebrd_bufferSizeFortran(handle, m, n, lwork) & result(res) & bind(c, name = 'hipsolverSgebrd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSgebrd_bufferSize(handle, m, n, lwork) end function hipsolverSgebrd_bufferSizeFortran function hipsolverDgebrd_bufferSizeFortran(handle, m, n, lwork) & result(res) & bind(c, name = 'hipsolverDgebrd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDgebrd_bufferSize(handle, m, n, lwork) end function hipsolverDgebrd_bufferSizeFortran function hipsolverCgebrd_bufferSizeFortran(handle, m, n, lwork) & result(res) & bind(c, name = 'hipsolverCgebrd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCgebrd_bufferSize(handle, m, n, lwork) end function hipsolverCgebrd_bufferSizeFortran function hipsolverZgebrd_bufferSizeFortran(handle, m, n, lwork) & result(res) & bind(c, name = 'hipsolverZgebrd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZgebrd_bufferSize(handle, m, n, lwork) end function hipsolverZgebrd_bufferSizeFortran function hipsolverSgebrdFortran(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSgebrdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tauq type(c_ptr), value :: taup type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSgebrd(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info) end function hipsolverSgebrdFortran function hipsolverDgebrdFortran(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDgebrdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tauq type(c_ptr), value :: taup type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDgebrd(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info) end function hipsolverDgebrdFortran function hipsolverCgebrdFortran(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCgebrdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tauq type(c_ptr), value :: taup type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCgebrd(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info) end function hipsolverCgebrdFortran function hipsolverZgebrdFortran(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZgebrdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tauq type(c_ptr), value :: taup type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZgebrd(handle, m, n, A, lda, D, E, tauq, taup, work, lwork, info) end function hipsolverZgebrdFortran ! ******************** GELS ******************** function hipsolverSSgels_bufferSizeFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork) & result(res) & bind(c, name = 'hipsolverSSgels_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSSgels_bufferSize(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork) end function hipsolverSSgels_bufferSizeFortran function hipsolverDDgels_bufferSizeFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork) & result(res) & bind(c, name = 'hipsolverDDgels_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDDgels_bufferSize(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork) end function hipsolverDDgels_bufferSizeFortran function hipsolverCCgels_bufferSizeFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork) & result(res) & bind(c, name = 'hipsolverCCgels_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCCgels_bufferSize(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork) end function hipsolverCCgels_bufferSizeFortran function hipsolverZZgels_bufferSizeFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork) & result(res) & bind(c, name = 'hipsolverZZgels_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZZgels_bufferSize(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, lwork) end function hipsolverZZgels_bufferSizeFortran function hipsolverSSgelsFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info) & result(res) & bind(c, name = 'hipsolverSSgelsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: niters type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSSgels(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info) end function hipsolverSSgelsFortran function hipsolverDDgelsFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info) & result(res) & bind(c, name = 'hipsolverDDgelsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: niters type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDDgels(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info) end function hipsolverDDgelsFortran function hipsolverCCgelsFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info) & result(res) & bind(c, name = 'hipsolverCCgelsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: niters type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCCgels(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info) end function hipsolverCCgelsFortran function hipsolverZZgelsFortran(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info) & result(res) & bind(c, name = 'hipsolverZZgelsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: niters type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZZgels(handle, m, n, nrhs, A, lda, B, ldb, X, ldx, work, lwork, niters, info) end function hipsolverZZgelsFortran ! ******************** GEQRF ******************** function hipsolverSgeqrf_bufferSizeFortran(handle, m, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverSgeqrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSgeqrf_bufferSize(handle, m, n, A, lda, lwork) end function hipsolverSgeqrf_bufferSizeFortran function hipsolverDgeqrf_bufferSizeFortran(handle, m, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverDgeqrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDgeqrf_bufferSize(handle, m, n, A, lda, lwork) end function hipsolverDgeqrf_bufferSizeFortran function hipsolverCgeqrf_bufferSizeFortran(handle, m, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverCgeqrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCgeqrf_bufferSize(handle, m, n, A, lda, lwork) end function hipsolverCgeqrf_bufferSizeFortran function hipsolverZgeqrf_bufferSizeFortran(handle, m, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverZgeqrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZgeqrf_bufferSize(handle, m, n, A, lda, lwork) end function hipsolverZgeqrf_bufferSizeFortran function hipsolverSgeqrfFortran(handle, m, n, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSgeqrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSgeqrf(handle, m, n, A, lda, tau, work, lwork, info) end function hipsolverSgeqrfFortran function hipsolverDgeqrfFortran(handle, m, n, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDgeqrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDgeqrf(handle, m, n, A, lda, tau, work, lwork, info) end function hipsolverDgeqrfFortran function hipsolverCgeqrfFortran(handle, m, n, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCgeqrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCgeqrf(handle, m, n, A, lda, tau, work, lwork, info) end function hipsolverCgeqrfFortran function hipsolverZgeqrfFortran(handle, m, n, A, lda, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZgeqrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZgeqrf(handle, m, n, A, lda, tau, work, lwork, info) end function hipsolverZgeqrfFortran ! ******************** GESV ******************** function hipsolverSSgesv_bufferSizeFortran(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork) & result(res) & bind(c, name = 'hipsolverSSgesv_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSSgesv_bufferSize(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork) end function hipsolverSSgesv_bufferSizeFortran function hipsolverDDgesv_bufferSizeFortran(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork) & result(res) & bind(c, name = 'hipsolverDDgesv_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDDgesv_bufferSize(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork) end function hipsolverDDgesv_bufferSizeFortran function hipsolverCCgesv_bufferSizeFortran(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork) & result(res) & bind(c, name = 'hipsolverCCgesv_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCCgesv_bufferSize(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork) end function hipsolverCCgesv_bufferSizeFortran function hipsolverZZgesv_bufferSizeFortran(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork) & result(res) & bind(c, name = 'hipsolverZZgesv_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZZgesv_bufferSize(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, lwork) end function hipsolverZZgesv_bufferSizeFortran function hipsolverSSgesvFortran(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info) & result(res) & bind(c, name = 'hipsolverSSgesvFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: niters type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSSgesv(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info) end function hipsolverSSgesvFortran function hipsolverDDgesvFortran(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info) & result(res) & bind(c, name = 'hipsolverDDgesvFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: niters type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDDgesv(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info) end function hipsolverDDgesvFortran function hipsolverCCgesvFortran(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info) & result(res) & bind(c, name = 'hipsolverCCgesvFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: niters type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCCgesv(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info) end function hipsolverCCgesvFortran function hipsolverZZgesvFortran(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info) & result(res) & bind(c, name = 'hipsolverZZgesvFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: X integer(c_int), value :: ldx type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: niters type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZZgesv(handle, n, nrhs, A, lda, ipiv, B, ldb, X, ldx, work, lwork, niters, info) end function hipsolverZZgesvFortran ! ******************** GESVD ******************** function hipsolverSgesvd_bufferSizeFortran(handle, jobu, jobv, m, n, lwork) & result(res) & bind(c, name = 'hipsolverSgesvd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_signed_char), value :: jobu integer(c_signed_char), value :: jobv integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSgesvd_bufferSize(handle, jobu, jobv, m, n, lwork) end function hipsolverSgesvd_bufferSizeFortran function hipsolverDgesvd_bufferSizeFortran(handle, jobu, jobv, m, n, lwork) & result(res) & bind(c, name = 'hipsolverDgesvd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_signed_char), value :: jobu integer(c_signed_char), value :: jobv integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDgesvd_bufferSize(handle, jobu, jobv, m, n, lwork) end function hipsolverDgesvd_bufferSizeFortran function hipsolverCgesvd_bufferSizeFortran(handle, jobu, jobv, m, n, lwork) & result(res) & bind(c, name = 'hipsolverCgesvd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_signed_char), value :: jobu integer(c_signed_char), value :: jobv integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCgesvd_bufferSize(handle, jobu, jobv, m, n, lwork) end function hipsolverCgesvd_bufferSizeFortran function hipsolverZgesvd_bufferSizeFortran(handle, jobu, jobv, m, n, lwork) & result(res) & bind(c, name = 'hipsolverZgesvd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_signed_char), value :: jobu integer(c_signed_char), value :: jobv integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZgesvd_bufferSize(handle, jobu, jobv, m, n, lwork) end function hipsolverZgesvd_bufferSizeFortran function hipsolverSgesvdFortran(handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info) & result(res) & bind(c, name = 'hipsolverSgesvdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_signed_char), value :: jobu integer(c_signed_char), value :: jobv integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: rwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSgesvd(handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info) end function hipsolverSgesvdFortran function hipsolverDgesvdFortran(handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info) & result(res) & bind(c, name = 'hipsolverDgesvdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_signed_char), value :: jobu integer(c_signed_char), value :: jobv integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: rwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDgesvd(handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info) end function hipsolverDgesvdFortran function hipsolverCgesvdFortran(handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info) & result(res) & bind(c, name = 'hipsolverCgesvdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_signed_char), value :: jobu integer(c_signed_char), value :: jobv integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: rwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCgesvd(handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info) end function hipsolverCgesvdFortran function hipsolverZgesvdFortran(handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info) & result(res) & bind(c, name = 'hipsolverZgesvdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_signed_char), value :: jobu integer(c_signed_char), value :: jobv integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: rwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZgesvd(handle, jobu, jobv, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, rwork, info) end function hipsolverZgesvdFortran ! ******************** GESVDJ ******************** function hipsolverSgesvdj_bufferSizeFortran(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params) & result(res) & bind(c, name = 'hipsolverSgesvdj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: econ integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverSgesvdj_bufferSize(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params) end function hipsolverSgesvdj_bufferSizeFortran function hipsolverDgesvdj_bufferSizeFortran(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params) & result(res) & bind(c, name = 'hipsolverDgesvdj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: econ integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverDgesvdj_bufferSize(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params) end function hipsolverDgesvdj_bufferSizeFortran function hipsolverCgesvdj_bufferSizeFortran(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params) & result(res) & bind(c, name = 'hipsolverCgesvdj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: econ integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverCgesvdj_bufferSize(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params) end function hipsolverCgesvdj_bufferSizeFortran function hipsolverZgesvdj_bufferSizeFortran(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params) & result(res) & bind(c, name = 'hipsolverZgesvdj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: econ integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverZgesvdj_bufferSize(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, params) end function hipsolverZgesvdj_bufferSizeFortran function hipsolverSgesvdjFortran(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverSgesvdjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: econ integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverSgesvdj(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params) end function hipsolverSgesvdjFortran function hipsolverDgesvdjFortran(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverDgesvdjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: econ integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverDgesvdj(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params) end function hipsolverDgesvdjFortran function hipsolverCgesvdjFortran(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverCgesvdjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: econ integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverCgesvdj(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params) end function hipsolverCgesvdjFortran function hipsolverZgesvdjFortran(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverZgesvdjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: econ integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverZgesvdj(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params) end function hipsolverZgesvdjFortran ! ******************** GESVDJ_BATCHED ******************** function hipsolverSgesvdjBatched_bufferSizeFortran(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, batch_count) & result(res) & bind(c, name = 'hipsolverSgesvdjBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverSgesvdjBatched_bufferSize(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, batch_count) end function hipsolverSgesvdjBatched_bufferSizeFortran function hipsolverDgesvdjBatched_bufferSizeFortran(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, batch_count) & result(res) & bind(c, name = 'hipsolverDgesvdjBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverDgesvdjBatched_bufferSize(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, batch_count) end function hipsolverDgesvdjBatched_bufferSizeFortran function hipsolverCgesvdjBatched_bufferSizeFortran(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, batch_count) & result(res) & bind(c, name = 'hipsolverCgesvdjBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverCgesvdjBatched_bufferSize(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, batch_count) end function hipsolverCgesvdjBatched_bufferSizeFortran function hipsolverZgesvdjBatched_bufferSizeFortran(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, batch_count) & result(res) & bind(c, name = 'hipsolverZgesvdjBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverZgesvdjBatched_bufferSize(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, batch_count) end function hipsolverZgesvdjBatched_bufferSizeFortran function hipsolverSgesvdjBatchedFortran(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, batch_count) & result(res) & bind(c, name = 'hipsolverSgesvdjBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverSgesvdjBatched(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, batch_count) end function hipsolverSgesvdjBatchedFortran function hipsolverDgesvdjBatchedFortran(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, batch_count) & result(res) & bind(c, name = 'hipsolverDgesvdjBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverDgesvdjBatched(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, batch_count) end function hipsolverDgesvdjBatchedFortran function hipsolverCgesvdjBatchedFortran(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, batch_count) & result(res) & bind(c, name = 'hipsolverCgesvdjBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverCgesvdjBatched(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, batch_count) end function hipsolverCgesvdjBatchedFortran function hipsolverZgesvdjBatchedFortran(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, batch_count) & result(res) & bind(c, name = 'hipsolverZgesvdjBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: S type(c_ptr), value :: U integer(c_int), value :: ldu type(c_ptr), value :: V integer(c_int), value :: ldv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverZgesvdjBatched(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params, batch_count) end function hipsolverZgesvdjBatchedFortran ! ******************** GETRF ******************** function hipsolverSgetrf_bufferSizeFortran(handle, m, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverSgetrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSgetrf_bufferSize(handle, m, n, A, lda, lwork) end function hipsolverSgetrf_bufferSizeFortran function hipsolverDgetrf_bufferSizeFortran(handle, m, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverDgetrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDgetrf_bufferSize(handle, m, n, A, lda, lwork) end function hipsolverDgetrf_bufferSizeFortran function hipsolverCgetrf_bufferSizeFortran(handle, m, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverCgetrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCgetrf_bufferSize(handle, m, n, A, lda, lwork) end function hipsolverCgetrf_bufferSizeFortran function hipsolverZgetrf_bufferSizeFortran(handle, m, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverZgetrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZgetrf_bufferSize(handle, m, n, A, lda, lwork) end function hipsolverZgetrf_bufferSizeFortran function hipsolverSgetrfFortran(handle, m, n, A, lda, work, lwork, ipiv, info) & result(res) & bind(c, name = 'hipsolverSgetrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: ipiv type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSgetrf(handle, m, n, A, lda, work, lwork, ipiv, info) end function hipsolverSgetrfFortran function hipsolverDgetrfFortran(handle, m, n, A, lda, work, lwork, ipiv, info) & result(res) & bind(c, name = 'hipsolverDgetrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: ipiv type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDgetrf(handle, m, n, A, lda, work, lwork, ipiv, info) end function hipsolverDgetrfFortran function hipsolverCgetrfFortran(handle, m, n, A, lda, work, lwork, ipiv, info) & result(res) & bind(c, name = 'hipsolverCgetrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: ipiv type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCgetrf(handle, m, n, A, lda, work, lwork, ipiv, info) end function hipsolverCgetrfFortran function hipsolverZgetrfFortran(handle, m, n, A, lda, work, lwork, ipiv, info) & result(res) & bind(c, name = 'hipsolverZgetrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: m integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: ipiv type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZgetrf(handle, m, n, A, lda, work, lwork, ipiv, info) end function hipsolverZgetrfFortran ! ******************** GETRS ******************** function hipsolverSgetrs_bufferSizeFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork) & result(res) & bind(c, name = 'hipsolverSgetrs_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSgetrs_bufferSize(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork) end function hipsolverSgetrs_bufferSizeFortran function hipsolverDgetrs_bufferSizeFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork) & result(res) & bind(c, name = 'hipsolverDgetrs_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDgetrs_bufferSize(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork) end function hipsolverDgetrs_bufferSizeFortran function hipsolverCgetrs_bufferSizeFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork) & result(res) & bind(c, name = 'hipsolverCgetrs_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCgetrs_bufferSize(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork) end function hipsolverCgetrs_bufferSizeFortran function hipsolverZgetrs_bufferSizeFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork) & result(res) & bind(c, name = 'hipsolverZgetrs_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZgetrs_bufferSize(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, lwork) end function hipsolverZgetrs_bufferSizeFortran function hipsolverSgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSgetrsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info) end function hipsolverSgetrsFortran function hipsolverDgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDgetrsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info) end function hipsolverDgetrsFortran function hipsolverCgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCgetrsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info) end function hipsolverCgetrsFortran function hipsolverZgetrsFortran(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZgetrsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_OP_N)), value :: trans integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZgetrs(handle, trans, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, info) end function hipsolverZgetrsFortran ! ******************** POTRF ******************** function hipsolverSpotrf_bufferSizeFortran(handle, uplo, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverSpotrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSpotrf_bufferSize(handle, uplo, n, A, lda, lwork) end function hipsolverSpotrf_bufferSizeFortran function hipsolverDpotrf_bufferSizeFortran(handle, uplo, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverDpotrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDpotrf_bufferSize(handle, uplo, n, A, lda, lwork) end function hipsolverDpotrf_bufferSizeFortran function hipsolverCpotrf_bufferSizeFortran(handle, uplo, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverCpotrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCpotrf_bufferSize(handle, uplo, n, A, lda, lwork) end function hipsolverCpotrf_bufferSizeFortran function hipsolverZpotrf_bufferSizeFortran(handle, uplo, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverZpotrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZpotrf_bufferSize(handle, uplo, n, A, lda, lwork) end function hipsolverZpotrf_bufferSizeFortran function hipsolverSpotrfFortran(handle, uplo, n, A, lda, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSpotrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSpotrf(handle, uplo, n, A, lda, work, lwork, info) end function hipsolverSpotrfFortran function hipsolverDpotrfFortran(handle, uplo, n, A, lda, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDpotrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDpotrf(handle, uplo, n, A, lda, work, lwork, info) end function hipsolverDpotrfFortran function hipsolverCpotrfFortran(handle, uplo, n, A, lda, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCpotrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCpotrf(handle, uplo, n, A, lda, work, lwork, info) end function hipsolverCpotrfFortran function hipsolverZpotrfFortran(handle, uplo, n, A, lda, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZpotrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZpotrf(handle, uplo, n, A, lda, work, lwork, info) end function hipsolverZpotrfFortran ! ******************** POTRF_BATCHED ******************** function hipsolverSpotrfBatched_bufferSizeFortran(handle, uplo, n, A, lda, lwork, batch_count) & result(res) & bind(c, name = 'hipsolverSpotrfBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverSpotrfBatched_bufferSize(handle, uplo, n, A, lda, lwork, batch_count) end function hipsolverSpotrfBatched_bufferSizeFortran function hipsolverDpotrfBatched_bufferSizeFortran(handle, uplo, n, A, lda, lwork, batch_count) & result(res) & bind(c, name = 'hipsolverDpotrfBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverDpotrfBatched_bufferSize(handle, uplo, n, A, lda, lwork, batch_count) end function hipsolverDpotrfBatched_bufferSizeFortran function hipsolverCpotrfBatched_bufferSizeFortran(handle, uplo, n, A, lda, lwork, batch_count) & result(res) & bind(c, name = 'hipsolverCpotrfBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverCpotrfBatched_bufferSize(handle, uplo, n, A, lda, lwork, batch_count) end function hipsolverCpotrfBatched_bufferSizeFortran function hipsolverZpotrfBatched_bufferSizeFortran(handle, uplo, n, A, lda, lwork, batch_count) & result(res) & bind(c, name = 'hipsolverZpotrfBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverZpotrfBatched_bufferSize(handle, uplo, n, A, lda, lwork, batch_count) end function hipsolverZpotrfBatched_bufferSizeFortran function hipsolverSpotrfBatchedFortran(handle, uplo, n, A, lda, work, lwork, info, batch_count) & result(res) & bind(c, name = 'hipsolverSpotrfBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverSpotrfBatched(handle, uplo, n, A, lda, work, lwork, info, batch_count) end function hipsolverSpotrfBatchedFortran function hipsolverDpotrfBatchedFortran(handle, uplo, n, A, lda, work, lwork, info, batch_count) & result(res) & bind(c, name = 'hipsolverDpotrfBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverDpotrfBatched(handle, uplo, n, A, lda, work, lwork, info, batch_count) end function hipsolverDpotrfBatchedFortran function hipsolverCpotrfBatchedFortran(handle, uplo, n, A, lda, work, lwork, info, batch_count) & result(res) & bind(c, name = 'hipsolverCpotrfBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverCpotrfBatched(handle, uplo, n, A, lda, work, lwork, info, batch_count) end function hipsolverCpotrfBatchedFortran function hipsolverZpotrfBatchedFortran(handle, uplo, n, A, lda, work, lwork, info, batch_count) & result(res) & bind(c, name = 'hipsolverZpotrfBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverZpotrfBatched(handle, uplo, n, A, lda, work, lwork, info, batch_count) end function hipsolverZpotrfBatchedFortran ! ******************** POTRI ******************** function hipsolverSpotri_bufferSizeFortran(handle, uplo, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverSpotri_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSpotri_bufferSize(handle, uplo, n, A, lda, lwork) end function hipsolverSpotri_bufferSizeFortran function hipsolverDpotri_bufferSizeFortran(handle, uplo, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverDpotri_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDpotri_bufferSize(handle, uplo, n, A, lda, lwork) end function hipsolverDpotri_bufferSizeFortran function hipsolverCpotri_bufferSizeFortran(handle, uplo, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverCpotri_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCpotri_bufferSize(handle, uplo, n, A, lda, lwork) end function hipsolverCpotri_bufferSizeFortran function hipsolverZpotri_bufferSizeFortran(handle, uplo, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverZpotri_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZpotri_bufferSize(handle, uplo, n, A, lda, lwork) end function hipsolverZpotri_bufferSizeFortran function hipsolverSpotriFortran(handle, uplo, n, A, lda, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSpotriFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSpotri(handle, uplo, n, A, lda, work, lwork, info) end function hipsolverSpotriFortran function hipsolverDpotriFortran(handle, uplo, n, A, lda, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDpotriFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDpotri(handle, uplo, n, A, lda, work, lwork, info) end function hipsolverDpotriFortran function hipsolverCpotriFortran(handle, uplo, n, A, lda, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCpotriFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCpotri(handle, uplo, n, A, lda, work, lwork, info) end function hipsolverCpotriFortran function hipsolverZpotriFortran(handle, uplo, n, A, lda, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZpotriFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZpotri(handle, uplo, n, A, lda, work, lwork, info) end function hipsolverZpotriFortran ! ******************** POTRS ******************** function hipsolverSpotrs_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork) & result(res) & bind(c, name = 'hipsolverSpotrs_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSpotrs_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork) end function hipsolverSpotrs_bufferSizeFortran function hipsolverDpotrs_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork) & result(res) & bind(c, name = 'hipsolverDpotrs_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDpotrs_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork) end function hipsolverDpotrs_bufferSizeFortran function hipsolverCpotrs_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork) & result(res) & bind(c, name = 'hipsolverCpotrs_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCpotrs_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork) end function hipsolverCpotrs_bufferSizeFortran function hipsolverZpotrs_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork) & result(res) & bind(c, name = 'hipsolverZpotrs_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZpotrs_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork) end function hipsolverZpotrs_bufferSizeFortran function hipsolverSpotrsFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSpotrsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSpotrs(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info) end function hipsolverSpotrsFortran function hipsolverDpotrsFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDpotrsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDpotrs(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info) end function hipsolverDpotrsFortran function hipsolverCpotrsFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCpotrsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCpotrs(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info) end function hipsolverCpotrsFortran function hipsolverZpotrsFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZpotrsFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZpotrs(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info) end function hipsolverZpotrsFortran ! ******************** POTRS_BATCHED ******************** function hipsolverSpotrsBatched_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, batch_count) & result(res) & bind(c, name = 'hipsolverSpotrsBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverSpotrsBatched_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, batch_count) end function hipsolverSpotrsBatched_bufferSizeFortran function hipsolverDpotrsBatched_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, batch_count) & result(res) & bind(c, name = 'hipsolverDpotrsBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverDpotrsBatched_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, batch_count) end function hipsolverDpotrsBatched_bufferSizeFortran function hipsolverCpotrsBatched_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, batch_count) & result(res) & bind(c, name = 'hipsolverCpotrsBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverCpotrsBatched_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, batch_count) end function hipsolverCpotrsBatched_bufferSizeFortran function hipsolverZpotrsBatched_bufferSizeFortran(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, batch_count) & result(res) & bind(c, name = 'hipsolverZpotrsBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: lwork integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverZpotrsBatched_bufferSize(handle, uplo, n, nrhs, A, lda, B, ldb, lwork, batch_count) end function hipsolverZpotrsBatched_bufferSizeFortran function hipsolverSpotrsBatchedFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, batch_count) & result(res) & bind(c, name = 'hipsolverSpotrsBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverSpotrsBatched(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, batch_count) end function hipsolverSpotrsBatchedFortran function hipsolverDpotrsBatchedFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, batch_count) & result(res) & bind(c, name = 'hipsolverDpotrsBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverDpotrsBatched(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, batch_count) end function hipsolverDpotrsBatchedFortran function hipsolverCpotrsBatchedFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, batch_count) & result(res) & bind(c, name = 'hipsolverCpotrsBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverCpotrsBatched(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, batch_count) end function hipsolverCpotrsBatchedFortran function hipsolverZpotrsBatchedFortran(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, batch_count) & result(res) & bind(c, name = 'hipsolverZpotrsBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n integer(c_int), value :: nrhs type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverZpotrsBatched(handle, uplo, n, nrhs, A, lda, B, ldb, work, lwork, info, batch_count) end function hipsolverZpotrsBatchedFortran ! ******************** SYEVD/HEEVD ******************** function hipsolverSsyevd_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork) & result(res) & bind(c, name = 'hipsolverSsyevd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSsyevd_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork) end function hipsolverSsyevd_bufferSizeFortran function hipsolverDsyevd_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork) & result(res) & bind(c, name = 'hipsolverDsyevd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDsyevd_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork) end function hipsolverDsyevd_bufferSizeFortran function hipsolverCheevd_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork) & result(res) & bind(c, name = 'hipsolverCheevd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCheevd_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork) end function hipsolverCheevd_bufferSizeFortran function hipsolverZheevd_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork) & result(res) & bind(c, name = 'hipsolverZheevd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZheevd_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork) end function hipsolverZheevd_bufferSizeFortran function hipsolverSsyevdFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSsyevdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSsyevd(handle, jobz, uplo, n, A, lda, W, work, lwork, info) end function hipsolverSsyevdFortran function hipsolverDsyevdFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDsyevdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDsyevd(handle, jobz, uplo, n, A, lda, W, work, lwork, info) end function hipsolverDsyevdFortran function hipsolverCheevdFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCheevdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCheevd(handle, jobz, uplo, n, A, lda, W, work, lwork, info) end function hipsolverCheevdFortran function hipsolverZheevdFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZheevdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZheevd(handle, jobz, uplo, n, A, lda, W, work, lwork, info) end function hipsolverZheevdFortran ! ******************** SYEVJ/HEEVJ ******************** function hipsolverSsyevj_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params) & result(res) & bind(c, name = 'hipsolverSsyevj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverSsyevj_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params) end function hipsolverSsyevj_bufferSizeFortran function hipsolverDsyevj_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params) & result(res) & bind(c, name = 'hipsolverDsyevj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverDsyevj_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params) end function hipsolverDsyevj_bufferSizeFortran function hipsolverCheevj_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params) & result(res) & bind(c, name = 'hipsolverCheevj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverCheevj_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params) end function hipsolverCheevj_bufferSizeFortran function hipsolverZheevj_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params) & result(res) & bind(c, name = 'hipsolverZheevj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverZheevj_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params) end function hipsolverZheevj_bufferSizeFortran function hipsolverSsyevjFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverSsyevjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverSsyevj(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params) end function hipsolverSsyevjFortran function hipsolverDsyevjFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverDsyevjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverDsyevj(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params) end function hipsolverDsyevjFortran function hipsolverCheevjFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverCheevjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverCheevj(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params) end function hipsolverCheevjFortran function hipsolverZheevjFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverZheevjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverZheevj(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params) end function hipsolverZheevjFortran ! ******************** SYEVJ_BATCHED/HEEVJ_BATCHED ******************** function hipsolverSsyevjBatched_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params, batch_count) & result(res) & bind(c, name = 'hipsolverSsyevjBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverSsyevjBatched_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params, batch_count) end function hipsolverSsyevjBatched_bufferSizeFortran function hipsolverDsyevjBatched_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params, batch_count) & result(res) & bind(c, name = 'hipsolverDsyevjBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverDsyevjBatched_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params, batch_count) end function hipsolverDsyevjBatched_bufferSizeFortran function hipsolverCheevjBatched_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params, batch_count) & result(res) & bind(c, name = 'hipsolverCheevjBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverCheevjBatched_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params, batch_count) end function hipsolverCheevjBatched_bufferSizeFortran function hipsolverZheevjBatched_bufferSizeFortran(handle, jobz, uplo, n, A, lda, W, lwork, params, batch_count) & result(res) & bind(c, name = 'hipsolverZheevjBatched_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverZheevjBatched_bufferSize(handle, jobz, uplo, n, A, lda, W, lwork, params, batch_count) end function hipsolverZheevjBatched_bufferSizeFortran function hipsolverSsyevjBatchedFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, batch_count) & result(res) & bind(c, name = 'hipsolverSsyevjBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverSsyevjBatched(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, batch_count) end function hipsolverSsyevjBatchedFortran function hipsolverDsyevjBatchedFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, batch_count) & result(res) & bind(c, name = 'hipsolverDsyevjBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverDsyevjBatched(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, batch_count) end function hipsolverDsyevjBatchedFortran function hipsolverCheevjBatchedFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, batch_count) & result(res) & bind(c, name = 'hipsolverCheevjBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverCheevjBatched(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, batch_count) end function hipsolverCheevjBatchedFortran function hipsolverZheevjBatchedFortran(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, batch_count) & result(res) & bind(c, name = 'hipsolverZheevjBatchedFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int), value :: batch_count integer(c_int) :: res res = hipsolverZheevjBatched(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, batch_count) end function hipsolverZheevjBatchedFortran ! ******************** SYGVD/HEGVD ******************** function hipsolverSsygvd_bufferSizeFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork) & result(res) & bind(c, name = 'hipsolverSsygvd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSsygvd_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork) end function hipsolverSsygvd_bufferSizeFortran function hipsolverDsygvd_bufferSizeFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork) & result(res) & bind(c, name = 'hipsolverDsygvd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDsygvd_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork) end function hipsolverDsygvd_bufferSizeFortran function hipsolverChegvd_bufferSizeFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork) & result(res) & bind(c, name = 'hipsolverChegvd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverChegvd_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork) end function hipsolverChegvd_bufferSizeFortran function hipsolverZhegvd_bufferSizeFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork) & result(res) & bind(c, name = 'hipsolverZhegvd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZhegvd_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork) end function hipsolverZhegvd_bufferSizeFortran function hipsolverSsygvdFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSsygvdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSsygvd(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info) end function hipsolverSsygvdFortran function hipsolverDsygvdFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDsygvdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDsygvd(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info) end function hipsolverDsygvdFortran function hipsolverChegvdFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info) & result(res) & bind(c, name = 'hipsolverChegvdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverChegvd(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info) end function hipsolverChegvdFortran function hipsolverZhegvdFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZhegvdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZhegvd(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info) end function hipsolverZhegvdFortran ! ******************** SYGVJ/HEGVJ ******************** function hipsolverSsygvj_bufferSizeFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params) & result(res) & bind(c, name = 'hipsolverSsygvj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverSsygvj_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params) end function hipsolverSsygvj_bufferSizeFortran function hipsolverDsygvj_bufferSizeFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params) & result(res) & bind(c, name = 'hipsolverDsygvj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverDsygvj_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params) end function hipsolverDsygvj_bufferSizeFortran function hipsolverChegvj_bufferSizeFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params) & result(res) & bind(c, name = 'hipsolverChegvj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverChegvj_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params) end function hipsolverChegvj_bufferSizeFortran function hipsolverZhegvj_bufferSizeFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params) & result(res) & bind(c, name = 'hipsolverZhegvj_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: lwork type(c_ptr), value :: params integer(c_int) :: res res = hipsolverZhegvj_bufferSize(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, params) end function hipsolverZhegvj_bufferSizeFortran function hipsolverSsygvjFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverSsygvjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverSsygvj(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params) end function hipsolverSsygvjFortran function hipsolverDsygvjFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverDsygvjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverDsygvj(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params) end function hipsolverDsygvjFortran function hipsolverChegvjFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverChegvjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverChegvj(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params) end function hipsolverChegvjFortran function hipsolverZhegvjFortran(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params) & result(res) & bind(c, name = 'hipsolverZhegvjFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_EIG_TYPE_1)), value :: itype integer(kind(HIPSOLVER_EIG_MODE_NOVECTOR)), value :: jobz integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: B integer(c_int), value :: ldb type(c_ptr), value :: W type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info type(c_ptr), value :: params integer(c_int) :: res res = hipsolverZhegvj(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, info, params) end function hipsolverZhegvjFortran ! ******************** SYTRD/HETRD ******************** function hipsolverSsytrd_bufferSizeFortran(handle, uplo, n, A, lda, D, E, tau, lwork) & result(res) & bind(c, name = 'hipsolverSsytrd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSsytrd_bufferSize(handle, uplo, n, A, lda, D, E, tau, lwork) end function hipsolverSsytrd_bufferSizeFortran function hipsolverDsytrd_bufferSizeFortran(handle, uplo, n, A, lda, D, E, tau, lwork) & result(res) & bind(c, name = 'hipsolverDsytrd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDsytrd_bufferSize(handle, uplo, n, A, lda, D, E, tau, lwork) end function hipsolverDsytrd_bufferSizeFortran function hipsolverChetrd_bufferSizeFortran(handle, uplo, n, A, lda, D, E, tau, lwork) & result(res) & bind(c, name = 'hipsolverChetrd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverChetrd_bufferSize(handle, uplo, n, A, lda, D, E, tau, lwork) end function hipsolverChetrd_bufferSizeFortran function hipsolverZhetrd_bufferSizeFortran(handle, uplo, n, A, lda, D, E, tau, lwork) & result(res) & bind(c, name = 'hipsolverZhetrd_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tau type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZhetrd_bufferSize(handle, uplo, n, A, lda, D, E, tau, lwork) end function hipsolverZhetrd_bufferSizeFortran function hipsolverSsytrdFortran(handle, uplo, n, A, lda, D, E, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSsytrdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSsytrd(handle, uplo, n, A, lda, D, E, tau, work, lwork, info) end function hipsolverSsytrdFortran function hipsolverDsytrdFortran(handle, uplo, n, A, lda, D, E, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDsytrdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDsytrd(handle, uplo, n, A, lda, D, E, tau, work, lwork, info) end function hipsolverDsytrdFortran function hipsolverChetrdFortran(handle, uplo, n, A, lda, D, E, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverChetrdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverChetrd(handle, uplo, n, A, lda, D, E, tau, work, lwork, info) end function hipsolverChetrdFortran function hipsolverZhetrdFortran(handle, uplo, n, A, lda, D, E, tau, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZhetrdFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: D type(c_ptr), value :: E type(c_ptr), value :: tau type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZhetrd(handle, uplo, n, A, lda, D, E, tau, work, lwork, info) end function hipsolverZhetrdFortran ! ******************** SYTRF ******************** function hipsolverSsytrf_bufferSizeFortran(handle, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverSsytrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverSsytrf_bufferSize(handle, n, A, lda, lwork) end function hipsolverSsytrf_bufferSizeFortran function hipsolverDsytrf_bufferSizeFortran(handle, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverDsytrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverDsytrf_bufferSize(handle, n, A, lda, lwork) end function hipsolverDsytrf_bufferSizeFortran function hipsolverCsytrf_bufferSizeFortran(handle, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverCsytrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverCsytrf_bufferSize(handle, n, A, lda, lwork) end function hipsolverCsytrf_bufferSizeFortran function hipsolverZsytrf_bufferSizeFortran(handle, n, A, lda, lwork) & result(res) & bind(c, name = 'hipsolverZsytrf_bufferSizeFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: lwork integer(c_int) :: res res = hipsolverZsytrf_bufferSize(handle, n, A, lda, lwork) end function hipsolverZsytrf_bufferSizeFortran function hipsolverSsytrfFortran(handle, uplo, n, A, lda, ipiv, work, lwork, info) & result(res) & bind(c, name = 'hipsolverSsytrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverSsytrf(handle, uplo, n, A, lda, ipiv, work, lwork, info) end function hipsolverSsytrfFortran function hipsolverDsytrfFortran(handle, uplo, n, A, lda, ipiv, work, lwork, info) & result(res) & bind(c, name = 'hipsolverDsytrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverDsytrf(handle, uplo, n, A, lda, ipiv, work, lwork, info) end function hipsolverDsytrfFortran function hipsolverCsytrfFortran(handle, uplo, n, A, lda, ipiv, work, lwork, info) & result(res) & bind(c, name = 'hipsolverCsytrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverCsytrf(handle, uplo, n, A, lda, ipiv, work, lwork, info) end function hipsolverCsytrfFortran function hipsolverZsytrfFortran(handle, uplo, n, A, lda, ipiv, work, lwork, info) & result(res) & bind(c, name = 'hipsolverZsytrfFortran') use iso_c_binding use hipsolver_enums implicit none type(c_ptr), value :: handle integer(kind(HIPSOLVER_FILL_MODE_LOWER)), value :: uplo integer(c_int), value :: n type(c_ptr), value :: A integer(c_int), value :: lda type(c_ptr), value :: ipiv type(c_ptr), value :: work integer(c_int), value :: lwork type(c_ptr), value :: info integer(c_int) :: res res = hipsolverZsytrf(handle, uplo, n, A, lda, ipiv, work, lwork, info) end function hipsolverZsytrfFortran end module hipsolver_interface hipSOLVER-rocm-5.5.1/clients/include/hipsolver_fortran.hpp000066400000000000000000005412751436107207300235570ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "hipsolver.h" /*!\file * This file interfaces with our Fortran LAPACK interface. */ /* * ============================================================================ * Fortran functions * ============================================================================ */ extern "C" { /* ========== * LAPACK * ========== */ // orgbr/ungbr HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSorgbr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, float* A, int lda, float* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDorgbr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, double* A, int lda, double* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCungbr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, hipFloatComplex* A, int lda, hipFloatComplex* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZungbr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSorgbrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, float* A, int lda, float* tau, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDorgbrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, double* A, int lda, double* tau, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCungbrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, hipFloatComplex* A, int lda, hipFloatComplex* tau, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZungbrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, int m, int n, int k, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, hipDoubleComplex* work, int lwork, int* devInfo); // orgqr/ungqr HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSorgqr_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, int k, float* A, int lda, float* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDorgqr_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, int k, double* A, int lda, double* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCungqr_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int k, hipFloatComplex* A, int lda, hipFloatComplex* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZungqr_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int k, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSorgqrFortran(hipsolverHandle_t handle, int m, int n, int k, float* A, int lda, float* tau, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDorgqrFortran(hipsolverHandle_t handle, int m, int n, int k, double* A, int lda, double* tau, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCungqrFortran(hipsolverHandle_t handle, int m, int n, int k, hipFloatComplex* A, int lda, hipFloatComplex* tau, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZungqrFortran(hipsolverHandle_t handle, int m, int n, int k, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, hipDoubleComplex* work, int lwork, int* devInfo); // orgtr/ungtr HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSorgtr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, float* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDorgtr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, double* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCungtr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, hipFloatComplex* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZungtr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSorgtrFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, float* tau, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDorgtrFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, double* tau, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCungtrFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, hipFloatComplex* tau, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZungtrFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, hipDoubleComplex* work, int lwork, int* devInfo); // ormqr/unmqr HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSormqr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, float* A, int lda, float* tau, float* C, int ldc, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDormqr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, double* A, int lda, double* tau, double* C, int ldc, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCunmqr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipFloatComplex* A, int lda, hipFloatComplex* tau, hipFloatComplex* C, int ldc, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZunmqr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, hipDoubleComplex* C, int ldc, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSormqrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, float* A, int lda, float* tau, float* C, int ldc, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDormqrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, double* A, int lda, double* tau, double* C, int ldc, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCunmqrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipFloatComplex* A, int lda, hipFloatComplex* tau, hipFloatComplex* C, int ldc, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZunmqrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, hipDoubleComplex* C, int ldc, hipDoubleComplex* work, int lwork, int* devInfo); // ormtr/unmtr HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSormtr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, float* A, int lda, float* tau, float* C, int ldc, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDormtr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, double* A, int lda, double* tau, double* C, int ldc, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCunmtr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipFloatComplex* A, int lda, hipFloatComplex* tau, hipFloatComplex* C, int ldc, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZunmtr_bufferSizeFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, hipDoubleComplex* C, int ldc, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSormtrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, float* A, int lda, float* tau, float* C, int ldc, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDormtrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, double* A, int lda, double* tau, double* C, int ldc, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCunmtrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipFloatComplex* A, int lda, hipFloatComplex* tau, hipFloatComplex* C, int ldc, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZunmtrFortran(hipsolverHandle_t handle, hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, hipDoubleComplex* C, int ldc, hipDoubleComplex* work, int lwork, int* devInfo); // gebrd HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgebrd_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgebrd_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgebrd_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgebrd_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgebrdFortran(hipsolverHandle_t handle, int m, int n, float* A, int lda, float* D, float* E, float* tauq, float* taup, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgebrdFortran(hipsolverHandle_t handle, int m, int n, double* A, int lda, double* D, double* E, double* tauq, double* taup, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgebrdFortran(hipsolverHandle_t handle, int m, int n, hipFloatComplex* A, int lda, float* D, float* E, hipFloatComplex* tauq, hipFloatComplex* taup, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgebrdFortran(hipsolverHandle_t handle, int m, int n, hipDoubleComplex* A, int lda, double* D, double* E, hipDoubleComplex* tauq, hipDoubleComplex* taup, hipDoubleComplex* work, int lwork, int* devInfo); // gels HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSSgels_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int nrhs, float* A, int lda, float* B, int ldb, float* X, int ldx, size_t* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDDgels_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int nrhs, double* A, int lda, double* B, int ldb, double* X, int ldx, size_t* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCCgels_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int nrhs, hipFloatComplex* A, int lda, hipFloatComplex* B, int ldb, hipFloatComplex* X, int ldx, size_t* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZZgels_bufferSizeFortran(hipsolverHandle_t handle, int m, int n, int nrhs, hipDoubleComplex* A, int lda, hipDoubleComplex* B, int ldb, hipDoubleComplex* X, int ldx, size_t* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSSgelsFortran(hipsolverHandle_t handle, int m, int n, int nrhs, float* A, int lda, float* B, int ldb, float* X, int ldx, void* work, size_t lwork, int* niters, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDDgelsFortran(hipsolverHandle_t handle, int m, int n, int nrhs, double* A, int lda, double* B, int ldb, double* X, int ldx, void* work, size_t lwork, int* niters, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCCgelsFortran(hipsolverHandle_t handle, int m, int n, int nrhs, hipFloatComplex* A, int lda, hipFloatComplex* B, int ldb, hipFloatComplex* X, int ldx, void* work, size_t lwork, int* niters, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZZgelsFortran(hipsolverHandle_t handle, int m, int n, int nrhs, hipDoubleComplex* A, int lda, hipDoubleComplex* B, int ldb, hipDoubleComplex* X, int ldx, void* work, size_t lwork, int* niters, int* devInfo); // geqrf HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgeqrf_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, float* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgeqrf_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, double* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgeqrf_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, hipFloatComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgeqrf_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, hipDoubleComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgeqrfFortran(hipsolverHandle_t handle, int m, int n, float* A, int lda, float* tau, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgeqrfFortran(hipsolverHandle_t handle, int m, int n, double* A, int lda, double* tau, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgeqrfFortran(hipsolverHandle_t handle, int m, int n, hipFloatComplex* A, int lda, hipFloatComplex* tau, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgeqrfFortran(hipsolverHandle_t handle, int m, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* tau, hipDoubleComplex* work, int lwork, int* devInfo); // gesv HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSSgesv_bufferSizeFortran(hipsolverHandle_t handle, int n, int nrhs, float* A, int lda, int* devIpiv, float* B, int ldb, float* X, int ldx, size_t* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDDgesv_bufferSizeFortran(hipsolverHandle_t handle, int n, int nrhs, double* A, int lda, int* devIpiv, double* B, int ldb, double* X, int ldx, size_t* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCCgesv_bufferSizeFortran(hipsolverHandle_t handle, int n, int nrhs, hipFloatComplex* A, int lda, int* devIpiv, hipFloatComplex* B, int ldb, hipFloatComplex* X, int ldx, size_t* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZZgesv_bufferSizeFortran(hipsolverHandle_t handle, int n, int nrhs, hipDoubleComplex* A, int lda, int* devIpiv, hipDoubleComplex* B, int ldb, hipDoubleComplex* X, int ldx, size_t* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSSgesvFortran(hipsolverHandle_t handle, int n, int nrhs, float* A, int lda, int* devIpiv, float* B, int ldb, float* X, int ldx, void* work, size_t lwork, int* niters, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDDgesvFortran(hipsolverHandle_t handle, int n, int nrhs, double* A, int lda, int* devIpiv, double* B, int ldb, double* X, int ldx, void* work, size_t lwork, int* niters, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCCgesvFortran(hipsolverHandle_t handle, int n, int nrhs, hipFloatComplex* A, int lda, int* devIpiv, hipFloatComplex* B, int ldb, hipFloatComplex* X, int ldx, void* work, size_t lwork, int* niters, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZZgesvFortran(hipsolverHandle_t handle, int n, int nrhs, hipDoubleComplex* A, int lda, int* devIpiv, hipDoubleComplex* B, int ldb, hipDoubleComplex* X, int ldx, void* work, size_t lwork, int* niters, int* devInfo); // gesvd HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgesvd_bufferSizeFortran( hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgesvd_bufferSizeFortran( hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgesvd_bufferSizeFortran( hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgesvd_bufferSizeFortran( hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgesvdFortran(hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, float* A, int lda, float* S, float* U, int ldu, float* V, int ldv, float* work, int lwork, float* rwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgesvdFortran(hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, double* A, int lda, double* S, double* U, int ldu, double* V, int ldv, double* work, int lwork, double* rwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgesvdFortran(hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, hipFloatComplex* A, int lda, float* S, hipFloatComplex* U, int ldu, hipFloatComplex* V, int ldv, hipFloatComplex* work, int lwork, float* rwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgesvdFortran(hipsolverHandle_t handle, signed char jobu, signed char jobv, int m, int n, hipDoubleComplex* A, int lda, double* S, hipDoubleComplex* U, int ldu, hipDoubleComplex* V, int ldv, hipDoubleComplex* work, int lwork, double* rwork, int* devInfo); // gesvdj HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgesvdj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, const float* A, int lda, const float* S, const float* U, int ldu, const float* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgesvdj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, const double* A, int lda, const double* S, const double* U, int ldu, const double* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgesvdj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, const hipFloatComplex* A, int lda, const float* S, const hipFloatComplex* U, int ldu, const hipFloatComplex* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgesvdj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, const hipDoubleComplex* A, int lda, const double* S, const hipDoubleComplex* U, int ldu, const hipDoubleComplex* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgesvdjFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, float* A, int lda, float* S, float* U, int ldu, float* V, int ldv, float* work, int lwork, int* devInfo, hipsolverGesvdjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgesvdjFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, double* A, int lda, double* S, double* U, int ldu, double* V, int ldv, double* work, int lwork, int* devInfo, hipsolverGesvdjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgesvdjFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, hipFloatComplex* A, int lda, float* S, hipFloatComplex* U, int ldu, hipFloatComplex* V, int ldv, hipFloatComplex* work, int lwork, int* devInfo, hipsolverGesvdjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgesvdjFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int econ, int m, int n, hipDoubleComplex* A, int lda, double* S, hipDoubleComplex* U, int ldu, hipDoubleComplex* V, int ldv, hipDoubleComplex* work, int lwork, int* devInfo, hipsolverGesvdjInfo_t params); // gesvdj_batched HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgesvdjBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int m, int n, const float* A, int lda, const float* S, const float* U, int ldu, const float* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgesvdjBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int m, int n, const double* A, int lda, const double* S, const double* U, int ldu, const double* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgesvdjBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int m, int n, const hipFloatComplex* A, int lda, const float* S, const hipFloatComplex* U, int ldu, const hipFloatComplex* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgesvdjBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int m, int n, const hipDoubleComplex* A, int lda, const double* S, const hipDoubleComplex* U, int ldu, const hipDoubleComplex* V, int ldv, int* lwork, hipsolverGesvdjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgesvdjBatchedFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int m, int n, float* A, int lda, float* S, float* U, int ldu, float* V, int ldv, float* work, int lwork, int* devInfo, hipsolverGesvdjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgesvdjBatchedFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int m, int n, double* A, int lda, double* S, double* U, int ldu, double* V, int ldv, double* work, int lwork, int* devInfo, hipsolverGesvdjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgesvdjBatchedFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int m, int n, hipFloatComplex* A, int lda, float* S, hipFloatComplex* U, int ldu, hipFloatComplex* V, int ldv, hipFloatComplex* work, int lwork, int* devInfo, hipsolverGesvdjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgesvdjBatchedFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int m, int n, hipDoubleComplex* A, int lda, double* S, hipDoubleComplex* U, int ldu, hipDoubleComplex* V, int ldv, hipDoubleComplex* work, int lwork, int* devInfo, hipsolverGesvdjInfo_t params, int batch_count); // getrf HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgetrf_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, float* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgetrf_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, double* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgetrf_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, hipFloatComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgetrf_bufferSizeFortran( hipsolverHandle_t handle, int m, int n, hipDoubleComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgetrfFortran(hipsolverHandle_t handle, int m, int n, float* A, int lda, float* work, int lwork, int* devIpiv, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgetrfFortran(hipsolverHandle_t handle, int m, int n, double* A, int lda, double* work, int lwork, int* devIpiv, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgetrfFortran(hipsolverHandle_t handle, int m, int n, hipFloatComplex* A, int lda, hipFloatComplex* work, int lwork, int* devIpiv, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgetrfFortran(hipsolverHandle_t handle, int m, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* work, int lwork, int* devIpiv, int* devInfo); // getrs HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgetrs_bufferSizeFortran(hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, float* A, int lda, int* devIpiv, float* B, int ldb, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgetrs_bufferSizeFortran(hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, double* A, int lda, int* devIpiv, double* B, int ldb, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgetrs_bufferSizeFortran(hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, hipFloatComplex* A, int lda, int* devIpiv, hipFloatComplex* B, int ldb, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgetrs_bufferSizeFortran(hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, hipDoubleComplex* A, int lda, int* devIpiv, hipDoubleComplex* B, int ldb, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSgetrsFortran(hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, float* A, int lda, int* devIpiv, float* B, int ldb, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDgetrsFortran(hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, double* A, int lda, int* devIpiv, double* B, int ldb, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCgetrsFortran(hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, hipFloatComplex* A, int lda, int* devIpiv, hipFloatComplex* B, int ldb, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZgetrsFortran(hipsolverHandle_t handle, hipsolverOperation_t trans, int n, int nrhs, hipDoubleComplex* A, int lda, int* devIpiv, hipDoubleComplex* B, int ldb, hipDoubleComplex* work, int lwork, int* devInfo); // potrf HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotrf_bufferSizeFortran( hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotrf_bufferSizeFortran( hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotrf_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotrf_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotrfFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotrfFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotrfFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotrfFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* work, int lwork, int* devInfo); // potrf_batched HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotrfBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A[], int lda, int* lwork, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotrfBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A[], int lda, int* lwork, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotrfBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A[], int lda, int* lwork, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotrfBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A[], int lda, int* lwork, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotrfBatchedFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A[], int lda, float* work, int lwork, int* devInfo, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotrfBatchedFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A[], int lda, double* work, int lwork, int* devInfo, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotrfBatchedFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A[], int lda, hipFloatComplex* work, int lwork, int* devInfo, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotrfBatchedFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A[], int lda, hipDoubleComplex* work, int lwork, int* devInfo, int batch_count); // potri HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotri_bufferSizeFortran( hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotri_bufferSizeFortran( hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotri_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotri_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotriFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotriFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotriFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotriFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* work, int lwork, int* devInfo); // potrs HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotrs_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, float* A, int lda, float* B, int ldb, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotrs_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, double* A, int lda, double* B, int ldb, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotrs_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipFloatComplex* A, int lda, hipFloatComplex* B, int ldb, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotrs_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipDoubleComplex* A, int lda, hipDoubleComplex* B, int ldb, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotrsFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, float* A, int lda, float* B, int ldb, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotrsFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, double* A, int lda, double* B, int ldb, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotrsFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipFloatComplex* A, int lda, hipFloatComplex* B, int ldb, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotrsFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipDoubleComplex* A, int lda, hipDoubleComplex* B, int ldb, hipDoubleComplex* work, int lwork, int* devInfo); // potrs_batched HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotrsBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, float* A[], int lda, float* B[], int ldb, int* lwork, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotrsBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, double* A[], int lda, double* B[], int ldb, int* lwork, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotrsBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipFloatComplex* A[], int lda, hipFloatComplex* B[], int ldb, int* lwork, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotrsBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipDoubleComplex* A[], int lda, hipDoubleComplex* B[], int ldb, int* lwork, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSpotrsBatchedFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, float* A[], int lda, float* B[], int ldb, float* work, int lwork, int* devInfo, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDpotrsBatchedFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, double* A[], int lda, double* B[], int ldb, double* work, int lwork, int* devInfo, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCpotrsBatchedFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipFloatComplex* A[], int lda, hipFloatComplex* B[], int ldb, hipFloatComplex* work, int lwork, int* devInfo, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZpotrsBatchedFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, int nrhs, hipDoubleComplex* A[], int lda, hipDoubleComplex* B[], int ldb, hipDoubleComplex* work, int lwork, int* devInfo, int batch_count); // syevd/heevd HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsyevd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* W, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsyevd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* W, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCheevd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, float* W, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZheevd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, double* W, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsyevdFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* W, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsyevdFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* W, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCheevdFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, float* W, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZheevdFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, double* W, hipDoubleComplex* work, int lwork, int* devInfo); // syevj/heevj HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsyevj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* W, int* lwork, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsyevj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* W, int* lwork, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCheevj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, float* W, int* lwork, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZheevj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, double* W, int* lwork, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsyevjFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* W, float* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsyevjFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* W, double* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCheevjFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, float* W, hipFloatComplex* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZheevjFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, double* W, hipDoubleComplex* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params); // syevj_batched/heevj_batched HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsyevjBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* W, int* lwork, hipsolverSyevjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsyevjBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* W, int* lwork, hipsolverSyevjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCheevjBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, float* W, int* lwork, hipsolverSyevjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZheevjBatched_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, double* W, int* lwork, hipsolverSyevjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsyevjBatchedFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* W, float* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsyevjBatchedFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* W, double* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCheevjBatchedFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, float* W, hipFloatComplex* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params, int batch_count); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZheevjBatchedFortran(hipsolverHandle_t handle, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, double* W, hipDoubleComplex* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params, int batch_count); // sygvd/hegvd HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsygvd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* B, int ldb, float* W, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsygvd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* B, int ldb, double* W, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverChegvd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, hipFloatComplex* B, int ldb, float* W, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZhegvd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* B, int ldb, double* W, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsygvdFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* B, int ldb, float* W, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsygvdFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* B, int ldb, double* W, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverChegvdFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, hipFloatComplex* B, int ldb, float* W, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZhegvdFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* B, int ldb, double* W, hipDoubleComplex* work, int lwork, int* devInfo); // sygvj/hegvj HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsygvj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* B, int ldb, float* W, int* lwork, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsygvj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* B, int ldb, double* W, int* lwork, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverChegvj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, hipFloatComplex* B, int ldb, float* W, int* lwork, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZhegvj_bufferSizeFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* B, int ldb, double* W, int* lwork, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsygvjFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, float* A, int lda, float* B, int ldb, float* W, float* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsygvjFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, double* A, int lda, double* B, int ldb, double* W, double* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverChegvjFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, hipFloatComplex* B, int ldb, float* W, hipFloatComplex* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZhegvjFortran(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* B, int ldb, double* W, hipDoubleComplex* work, int lwork, int* devInfo, hipsolverSyevjInfo_t params); // sytrd/hetrd HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsytrd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, float* D, float* E, float* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsytrd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, double* D, double* E, double* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverChetrd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, float* D, float* E, hipFloatComplex* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZhetrd_bufferSizeFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, double* D, double* E, hipDoubleComplex* tau, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsytrdFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, float* D, float* E, float* tau, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsytrdFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, double* D, double* E, double* tau, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverChetrdFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, float* D, float* E, hipFloatComplex* tau, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZhetrdFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, double* D, double* E, hipDoubleComplex* tau, hipDoubleComplex* work, int lwork, int* devInfo); // sytrf HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsytrf_bufferSizeFortran( hipsolverHandle_t handle, int n, float* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsytrf_bufferSizeFortran( hipsolverHandle_t handle, int n, double* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCsytrf_bufferSizeFortran( hipsolverHandle_t handle, int n, hipFloatComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZsytrf_bufferSizeFortran( hipsolverHandle_t handle, int n, hipDoubleComplex* A, int lda, int* lwork); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSsytrfFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, float* A, int lda, int* ipiv, float* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDsytrfFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, double* A, int lda, int* ipiv, double* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverCsytrfFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipFloatComplex* A, int lda, int* ipiv, hipFloatComplex* work, int lwork, int* devInfo); HIPSOLVER_EXPORT hipsolverStatus_t hipsolverZsytrfFortran(hipsolverHandle_t handle, hipsolverFillMode_t uplo, int n, hipDoubleComplex* A, int lda, int* ipiv, hipDoubleComplex* work, int lwork, int* devInfo); } hipSOLVER-rocm-5.5.1/clients/include/hipsolver_no_fortran.hpp000066400000000000000000000341541436107207300242440ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once /* * ============================================================================ * Redirect Fortran API to C API * ============================================================================ */ /* ========== * LAPACK * ========== */ // orgbr/ungbr #define hipsolverSorgbr_bufferSizeFortran hipsolverSorgbr_bufferSize #define hipsolverDorgbr_bufferSizeFortran hipsolverDorgbr_bufferSize #define hipsolverCungbr_bufferSizeFortran hipsolverCungbr_bufferSize #define hipsolverZungbr_bufferSizeFortran hipsolverZungbr_bufferSize #define hipsolverSorgbrFortran hipsolverSorgbr #define hipsolverDorgbrFortran hipsolverDorgbr #define hipsolverCungbrFortran hipsolverCungbr #define hipsolverZungbrFortran hipsolverZungbr // orgqr/ungqr #define hipsolverSorgqr_bufferSizeFortran hipsolverSorgqr_bufferSize #define hipsolverDorgqr_bufferSizeFortran hipsolverDorgqr_bufferSize #define hipsolverCungqr_bufferSizeFortran hipsolverCungqr_bufferSize #define hipsolverZungqr_bufferSizeFortran hipsolverZungqr_bufferSize #define hipsolverSorgqrFortran hipsolverSorgqr #define hipsolverDorgqrFortran hipsolverDorgqr #define hipsolverCungqrFortran hipsolverCungqr #define hipsolverZungqrFortran hipsolverZungqr // orgtr/ungtr #define hipsolverSorgtr_bufferSizeFortran hipsolverSorgtr_bufferSize #define hipsolverDorgtr_bufferSizeFortran hipsolverDorgtr_bufferSize #define hipsolverCungtr_bufferSizeFortran hipsolverCungtr_bufferSize #define hipsolverZungtr_bufferSizeFortran hipsolverZungtr_bufferSize #define hipsolverSorgtrFortran hipsolverSorgtr #define hipsolverDorgtrFortran hipsolverDorgtr #define hipsolverCungtrFortran hipsolverCungtr #define hipsolverZungtrFortran hipsolverZungtr // ormqr/unmqr #define hipsolverSormqr_bufferSizeFortran hipsolverSormqr_bufferSize #define hipsolverDormqr_bufferSizeFortran hipsolverDormqr_bufferSize #define hipsolverCunmqr_bufferSizeFortran hipsolverCunmqr_bufferSize #define hipsolverZunmqr_bufferSizeFortran hipsolverZunmqr_bufferSize #define hipsolverSormqrFortran hipsolverSormqr #define hipsolverDormqrFortran hipsolverDormqr #define hipsolverCunmqrFortran hipsolverCunmqr #define hipsolverZunmqrFortran hipsolverZunmqr // ormtr/unmtr #define hipsolverSormtr_bufferSizeFortran hipsolverSormtr_bufferSize #define hipsolverDormtr_bufferSizeFortran hipsolverDormtr_bufferSize #define hipsolverCunmtr_bufferSizeFortran hipsolverCunmtr_bufferSize #define hipsolverZunmtr_bufferSizeFortran hipsolverZunmtr_bufferSize #define hipsolverSormtrFortran hipsolverSormtr #define hipsolverDormtrFortran hipsolverDormtr #define hipsolverCunmtrFortran hipsolverCunmtr #define hipsolverZunmtrFortran hipsolverZunmtr // gebrd #define hipsolverSgebrd_bufferSizeFortran hipsolverSgebrd_bufferSize #define hipsolverDgebrd_bufferSizeFortran hipsolverDgebrd_bufferSize #define hipsolverCgebrd_bufferSizeFortran hipsolverCgebrd_bufferSize #define hipsolverZgebrd_bufferSizeFortran hipsolverZgebrd_bufferSize #define hipsolverSgebrdFortran hipsolverSgebrd #define hipsolverDgebrdFortran hipsolverDgebrd #define hipsolverCgebrdFortran hipsolverCgebrd #define hipsolverZgebrdFortran hipsolverZgebrd // gels #define hipsolverSSgels_bufferSizeFortran hipsolverSSgels_bufferSize #define hipsolverDDgels_bufferSizeFortran hipsolverDDgels_bufferSize #define hipsolverCCgels_bufferSizeFortran hipsolverCCgels_bufferSize #define hipsolverZZgels_bufferSizeFortran hipsolverZZgels_bufferSize #define hipsolverSSgelsFortran hipsolverSSgels #define hipsolverDDgelsFortran hipsolverDDgels #define hipsolverCCgelsFortran hipsolverCCgels #define hipsolverZZgelsFortran hipsolverZZgels // geqrf #define hipsolverSgeqrf_bufferSizeFortran hipsolverSgeqrf_bufferSize #define hipsolverDgeqrf_bufferSizeFortran hipsolverDgeqrf_bufferSize #define hipsolverCgeqrf_bufferSizeFortran hipsolverCgeqrf_bufferSize #define hipsolverZgeqrf_bufferSizeFortran hipsolverZgeqrf_bufferSize #define hipsolverSgeqrfFortran hipsolverSgeqrf #define hipsolverDgeqrfFortran hipsolverDgeqrf #define hipsolverCgeqrfFortran hipsolverCgeqrf #define hipsolverZgeqrfFortran hipsolverZgeqrf // gesv #define hipsolverSSgesv_bufferSizeFortran hipsolverSSgesv_bufferSize #define hipsolverDDgesv_bufferSizeFortran hipsolverDDgesv_bufferSize #define hipsolverCCgesv_bufferSizeFortran hipsolverCCgesv_bufferSize #define hipsolverZZgesv_bufferSizeFortran hipsolverZZgesv_bufferSize #define hipsolverSSgesvFortran hipsolverSSgesv #define hipsolverDDgesvFortran hipsolverDDgesv #define hipsolverCCgesvFortran hipsolverCCgesv #define hipsolverZZgesvFortran hipsolverZZgesv // gesvd #define hipsolverSgesvd_bufferSizeFortran hipsolverSgesvd_bufferSize #define hipsolverDgesvd_bufferSizeFortran hipsolverDgesvd_bufferSize #define hipsolverCgesvd_bufferSizeFortran hipsolverCgesvd_bufferSize #define hipsolverZgesvd_bufferSizeFortran hipsolverZgesvd_bufferSize #define hipsolverSgesvdFortran hipsolverSgesvd #define hipsolverDgesvdFortran hipsolverDgesvd #define hipsolverCgesvdFortran hipsolverCgesvd #define hipsolverZgesvdFortran hipsolverZgesvd // gesvdj #define hipsolverSgesvdj_bufferSizeFortran hipsolverSgesvdj_bufferSize #define hipsolverDgesvdj_bufferSizeFortran hipsolverDgesvdj_bufferSize #define hipsolverCgesvdj_bufferSizeFortran hipsolverCgesvdj_bufferSize #define hipsolverZgesvdj_bufferSizeFortran hipsolverZgesvdj_bufferSize #define hipsolverSgesvdjFortran hipsolverSgesvdj #define hipsolverDgesvdjFortran hipsolverDgesvdj #define hipsolverCgesvdjFortran hipsolverCgesvdj #define hipsolverZgesvdjFortran hipsolverZgesvdj // gesvdj_batched #define hipsolverSgesvdjBatched_bufferSizeFortran hipsolverSgesvdjBatched_bufferSize #define hipsolverDgesvdjBatched_bufferSizeFortran hipsolverDgesvdjBatched_bufferSize #define hipsolverCgesvdjBatched_bufferSizeFortran hipsolverCgesvdjBatched_bufferSize #define hipsolverZgesvdjBatched_bufferSizeFortran hipsolverZgesvdjBatched_bufferSize #define hipsolverSgesvdjBatchedFortran hipsolverSgesvdjBatched #define hipsolverDgesvdjBatchedFortran hipsolverDgesvdjBatched #define hipsolverCgesvdjBatchedFortran hipsolverCgesvdjBatched #define hipsolverZgesvdjBatchedFortran hipsolverZgesvdjBatched // getrf #define hipsolverSgetrf_bufferSizeFortran hipsolverSgetrf_bufferSize #define hipsolverDgetrf_bufferSizeFortran hipsolverDgetrf_bufferSize #define hipsolverCgetrf_bufferSizeFortran hipsolverCgetrf_bufferSize #define hipsolverZgetrf_bufferSizeFortran hipsolverZgetrf_bufferSize #define hipsolverSgetrfFortran hipsolverSgetrf #define hipsolverDgetrfFortran hipsolverDgetrf #define hipsolverCgetrfFortran hipsolverCgetrf #define hipsolverZgetrfFortran hipsolverZgetrf // getrs #define hipsolverSgetrs_bufferSizeFortran hipsolverSgetrs_bufferSize #define hipsolverDgetrs_bufferSizeFortran hipsolverDgetrs_bufferSize #define hipsolverCgetrs_bufferSizeFortran hipsolverCgetrs_bufferSize #define hipsolverZgetrs_bufferSizeFortran hipsolverZgetrs_bufferSize #define hipsolverSgetrsFortran hipsolverSgetrs #define hipsolverDgetrsFortran hipsolverDgetrs #define hipsolverCgetrsFortran hipsolverCgetrs #define hipsolverZgetrsFortran hipsolverZgetrs // potrf #define hipsolverSpotrf_bufferSizeFortran hipsolverSpotrf_bufferSize #define hipsolverDpotrf_bufferSizeFortran hipsolverDpotrf_bufferSize #define hipsolverCpotrf_bufferSizeFortran hipsolverCpotrf_bufferSize #define hipsolverZpotrf_bufferSizeFortran hipsolverZpotrf_bufferSize #define hipsolverSpotrfFortran hipsolverSpotrf #define hipsolverDpotrfFortran hipsolverDpotrf #define hipsolverCpotrfFortran hipsolverCpotrf #define hipsolverZpotrfFortran hipsolverZpotrf // potrf_batched #define hipsolverSpotrfBatched_bufferSizeFortran hipsolverSpotrfBatched_bufferSize #define hipsolverDpotrfBatched_bufferSizeFortran hipsolverDpotrfBatched_bufferSize #define hipsolverCpotrfBatched_bufferSizeFortran hipsolverCpotrfBatched_bufferSize #define hipsolverZpotrfBatched_bufferSizeFortran hipsolverZpotrfBatched_bufferSize #define hipsolverSpotrfBatchedFortran hipsolverSpotrfBatched #define hipsolverDpotrfBatchedFortran hipsolverDpotrfBatched #define hipsolverCpotrfBatchedFortran hipsolverCpotrfBatched #define hipsolverZpotrfBatchedFortran hipsolverZpotrfBatched // potri #define hipsolverSpotri_bufferSizeFortran hipsolverSpotri_bufferSize #define hipsolverDpotri_bufferSizeFortran hipsolverDpotri_bufferSize #define hipsolverCpotri_bufferSizeFortran hipsolverCpotri_bufferSize #define hipsolverZpotri_bufferSizeFortran hipsolverZpotri_bufferSize #define hipsolverSpotriFortran hipsolverSpotri #define hipsolverDpotriFortran hipsolverDpotri #define hipsolverCpotriFortran hipsolverCpotri #define hipsolverZpotriFortran hipsolverZpotri // potrs #define hipsolverSpotrs_bufferSizeFortran hipsolverSpotrs_bufferSize #define hipsolverDpotrs_bufferSizeFortran hipsolverDpotrs_bufferSize #define hipsolverCpotrs_bufferSizeFortran hipsolverCpotrs_bufferSize #define hipsolverZpotrs_bufferSizeFortran hipsolverZpotrs_bufferSize #define hipsolverSpotrsFortran hipsolverSpotrs #define hipsolverDpotrsFortran hipsolverDpotrs #define hipsolverCpotrsFortran hipsolverCpotrs #define hipsolverZpotrsFortran hipsolverZpotrs // potrs_batched #define hipsolverSpotrsBatched_bufferSizeFortran hipsolverSpotrsBatched_bufferSize #define hipsolverDpotrsBatched_bufferSizeFortran hipsolverDpotrsBatched_bufferSize #define hipsolverCpotrsBatched_bufferSizeFortran hipsolverCpotrsBatched_bufferSize #define hipsolverZpotrsBatched_bufferSizeFortran hipsolverZpotrsBatched_bufferSize #define hipsolverSpotrsBatchedFortran hipsolverSpotrsBatched #define hipsolverDpotrsBatchedFortran hipsolverDpotrsBatched #define hipsolverCpotrsBatchedFortran hipsolverCpotrsBatched #define hipsolverZpotrsBatchedFortran hipsolverZpotrsBatched // syevd/heevd #define hipsolverSsyevd_bufferSizeFortran hipsolverSsyevd_bufferSize #define hipsolverDsyevd_bufferSizeFortran hipsolverDsyevd_bufferSize #define hipsolverCheevd_bufferSizeFortran hipsolverCheevd_bufferSize #define hipsolverZheevd_bufferSizeFortran hipsolverZheevd_bufferSize #define hipsolverSsyevdFortran hipsolverSsyevd #define hipsolverDsyevdFortran hipsolverDsyevd #define hipsolverCheevdFortran hipsolverCheevd #define hipsolverZheevdFortran hipsolverZheevd // syevj/heevj #define hipsolverSsyevj_bufferSizeFortran hipsolverSsyevj_bufferSize #define hipsolverDsyevj_bufferSizeFortran hipsolverDsyevj_bufferSize #define hipsolverCheevj_bufferSizeFortran hipsolverCheevj_bufferSize #define hipsolverZheevj_bufferSizeFortran hipsolverZheevj_bufferSize #define hipsolverSsyevjFortran hipsolverSsyevj #define hipsolverDsyevjFortran hipsolverDsyevj #define hipsolverCheevjFortran hipsolverCheevj #define hipsolverZheevjFortran hipsolverZheevj // syevj/heevj #define hipsolverSsyevjBatched_bufferSizeFortran hipsolverSsyevjBatched_bufferSize #define hipsolverDsyevjBatched_bufferSizeFortran hipsolverDsyevjBatched_bufferSize #define hipsolverCheevjBatched_bufferSizeFortran hipsolverCheevjBatched_bufferSize #define hipsolverZheevjBatched_bufferSizeFortran hipsolverZheevjBatched_bufferSize #define hipsolverSsyevjBatchedFortran hipsolverSsyevjBatched #define hipsolverDsyevjBatchedFortran hipsolverDsyevjBatched #define hipsolverCheevjBatchedFortran hipsolverCheevjBatched #define hipsolverZheevjBatchedFortran hipsolverZheevjBatched // sygvd/hegvd #define hipsolverSsygvd_bufferSizeFortran hipsolverSsygvd_bufferSize #define hipsolverDsygvd_bufferSizeFortran hipsolverDsygvd_bufferSize #define hipsolverChegvd_bufferSizeFortran hipsolverChegvd_bufferSize #define hipsolverZhegvd_bufferSizeFortran hipsolverZhegvd_bufferSize #define hipsolverSsygvdFortran hipsolverSsygvd #define hipsolverDsygvdFortran hipsolverDsygvd #define hipsolverChegvdFortran hipsolverChegvd #define hipsolverZhegvdFortran hipsolverZhegvd // sygvj/hegvj #define hipsolverSsygvj_bufferSizeFortran hipsolverSsygvj_bufferSize #define hipsolverDsygvj_bufferSizeFortran hipsolverDsygvj_bufferSize #define hipsolverChegvj_bufferSizeFortran hipsolverChegvj_bufferSize #define hipsolverZhegvj_bufferSizeFortran hipsolverZhegvj_bufferSize #define hipsolverSsygvjFortran hipsolverSsygvj #define hipsolverDsygvjFortran hipsolverDsygvj #define hipsolverChegvjFortran hipsolverChegvj #define hipsolverZhegvjFortran hipsolverZhegvj // sytrd/hetrd #define hipsolverSsytrd_bufferSizeFortran hipsolverSsytrd_bufferSize #define hipsolverDsytrd_bufferSizeFortran hipsolverDsytrd_bufferSize #define hipsolverChetrd_bufferSizeFortran hipsolverChetrd_bufferSize #define hipsolverZhetrd_bufferSizeFortran hipsolverZhetrd_bufferSize #define hipsolverSsytrdFortran hipsolverSsytrd #define hipsolverDsytrdFortran hipsolverDsytrd #define hipsolverChetrdFortran hipsolverChetrd #define hipsolverZhetrdFortran hipsolverZhetrd // sytrf #define hipsolverSsytrf_bufferSizeFortran hipsolverSsytrf_bufferSize #define hipsolverDsytrf_bufferSizeFortran hipsolverDsytrf_bufferSize #define hipsolverCsytrf_bufferSizeFortran hipsolverCsytrf_bufferSize #define hipsolverZsytrf_bufferSizeFortran hipsolverZsytrf_bufferSize #define hipsolverSsytrfFortran hipsolverSsytrf #define hipsolverDsytrfFortran hipsolverDsytrf #define hipsolverCsytrfFortran hipsolverCsytrf #define hipsolverZsytrfFortran hipsolverZsytrf hipSOLVER-rocm-5.5.1/clients/include/lapack_host_reference.hpp000066400000000000000000000321251436107207300243040ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "hipsolver.h" #include "hipsolver_datatype2string.hpp" // BLAS template void cblas_gemm(hipsolverOperation_t transA, hipsolverOperation_t transB, int m, int n, int k, T alpha, T* A, int lda, T* B, int ldb, T beta, T* C, int ldc); template void cblas_symm_hemm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, int m, int n, T alpha, T* A, int lda, T* B, int ldb, T beta, T* C, int ldc); template void cblas_symv_hemv(hipsolverFillMode_t uplo, int n, T alpha, T* A, int lda, T* x, int incx, T beta, T* y, int incy); template void cblas_trmm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, T alpha, T* A, int lda, T* B, int ldb); template void cblas_trsm(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t transA, char diag, int m, int n, T alpha, T* A, int lda, T* B, int ldb); // LAPACK template void cblas_lacgv(int n, T* x, int incx); template void cblas_larf( hipsolverSideMode_t side, int m, int n, T* x, int incx, T* alpha, T* A, int lda, T* work); template void cblas_orgbr_ungbr(hipsolverSideMode_t side, int m, int n, int k, T* A, int lda, T* Ipiv, T* work, int size_w, int* info); template void cblas_orgqr_ungqr(int m, int n, int k, T* A, int lda, T* Ipiv, T* work, int sizeW, int* info); template void cblas_orgtr_ungtr( hipsolverFillMode_t uplo, int n, T* A, int lda, T* Ipiv, T* work, int size_w, int* info); template void cblas_ormqr_unmqr(hipsolverSideMode_t side, hipsolverOperation_t trans, int m, int n, int k, T* A, int lda, T* Ipiv, T* C, int ldc, T* work, int sizeW, int* info); template void cblas_ormtr_unmtr(hipsolverSideMode_t side, hipsolverFillMode_t uplo, hipsolverOperation_t trans, int m, int n, T* A, int lda, T* Ipiv, T* C, int ldc, T* work, int sizeW, int* info); template void cblas_gebrd( int m, int n, T* A, int lda, S* D, S* E, T* tauq, T* taup, T* work, int size_w, int* info); template void cblas_gels(hipsolverOperation_t transR, int m, int n, int nrhs, T* A, int lda, T* B, int ldb, T* work, int lwork, int* info); template void cblas_geqrf(int m, int n, T* A, int lda, T* ipiv, T* work, int sizeW, int* info); template void cblas_gesv(int n, int nrhs, T* A, int lda, int* ipiv, T* B, int ldb, int* info); template void cblas_gesvd(char leftv, char rightv, int m, int n, T* A, int lda, W* S, T* U, int ldu, T* V, int ldv, T* work, int lwork, W* E, int* info); template void cblas_gesvdx(hipsolverEigMode_t leftv, hipsolverEigMode_t rightv, char srange, int m, int n, T* A, int lda, W vl, W vu, int il, int iu, int* nsv, W* S, T* U, int ldu, T* V, int ldv, T* work, int lwork, W* rwork, int* iwork, int* info); template void cblas_getrf(int m, int n, T* A, int lda, int* ipiv, int* info); template void cblas_getrs(hipsolverOperation_t trans, int n, int nrhs, T* A, int lda, int* ipiv, T* B, int ldb, int* info); template void cblas_potrf(hipsolverFillMode_t uplo, int n, T* A, int lda, int* info); template void cblas_potri(hipsolverFillMode_t uplo, int n, T* A, int lda, int* info); template void cblas_potrs( hipsolverFillMode_t uplo, int n, int nrhs, T* A, int lda, T* B, int ldb, int* info); template void cblas_syevd_heevd(hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, T* A, int lda, S* W, T* work, int lwork, S* rwork, int lrwork, int* iwork, int liwork, int* info); template void cblas_syevx_heevx(hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, T* A, int lda, S vl, S vu, int il, int iu, S abstol, int* nev, S* W, T* Z, int ldz, T* work, int lwork, S* rwork, int* iwork, int* ifail, int* info); template void cblas_sygvd_hegvd(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverFillMode_t uplo, int n, T* A, int lda, T* B, int ldb, S* W, T* work, int lwork, S* rwork, int lrwork, int* iwork, int liwork, int* info); template void cblas_sygvx_hegvx(hipsolverEigType_t itype, hipsolverEigMode_t evect, hipsolverEigRange_t erange, hipsolverFillMode_t uplo, int n, T* A, int lda, T* B, int ldb, S vl, S vu, int il, int iu, S abstol, int* nev, S* W, T* Z, int ldz, T* work, int lwork, S* rwork, int* iwork, int* ifail, int* info); template void cblas_sytrd_hetrd( hipsolverFillMode_t uplo, int n, T* A, int lda, S* D, S* E, T* tau, T* work, int size_w); template void cblas_sytrf( hipsolverFillMode_t uplo, int n, T* A, int lda, int* ipiv, T* work, int lwork, int* info); hipSOLVER-rocm-5.5.1/clients/include/testing_gebrd.hpp000066400000000000000000001314101436107207300226130ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void gebrd_checkBadArgs(const hipsolverHandle_t handle, const int m, const int n, T dA, const int lda, const int stA, S dD, const int stD, S dE, const int stE, U dTauq, const int stQ, U dTaup, const int stP, U dWork, const int lwork, V dInfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS(hipsolver_gebrd(FORTRAN, nullptr, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values // N/A #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_gebrd(FORTRAN, handle, m, n, (T) nullptr, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gebrd(FORTRAN, handle, m, n, dA, lda, stA, (S) nullptr, stD, dE, stE, dTauq, stQ, dTaup, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gebrd(FORTRAN, handle, m, n, dA, lda, stA, dD, stD, (S) nullptr, stE, dTauq, stQ, dTaup, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gebrd(FORTRAN, handle, m, n, dA, lda, stA, dD, stD, dE, stE, (U) nullptr, stQ, dTaup, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gebrd(FORTRAN, handle, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, (U) nullptr, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gebrd(FORTRAN, handle, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, dWork, lwork, (V) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_gebrd_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; int m = 1; int n = 1; int lda = 1; int stA = 1; int stD = 1; int stE = 1; int stQ = 1; int stP = 1; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dD(1, 1, 1, 1); // device_strided_batch_vector dE(1, 1, 1, 1); // device_strided_batch_vector dTauq(1, 1, 1, 1); // device_strided_batch_vector dTaup(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dD.memcheck()); // CHECK_HIP_ERROR(dE.memcheck()); // CHECK_HIP_ERROR(dTauq.memcheck()); // CHECK_HIP_ERROR(dTaup.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // int size_W; // hipsolver_gebrd_bufferSize(FORTRAN, handle, m, n, dA.data(), lda, &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // gebrd_checkBadArgs(handle, // m, // n, // dA.data(), // lda, // stA, // dD.data(), // stD, // dE.data(), // stE, // dTauq.data(), // stQ, // dTaup.data(), // stP, // dWork.data(), // size_W, // dInfo.data(), // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dD(1, 1, 1, 1); device_strided_batch_vector dE(1, 1, 1, 1); device_strided_batch_vector dTauq(1, 1, 1, 1); device_strided_batch_vector dTaup(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dE.memcheck()); CHECK_HIP_ERROR(dTauq.memcheck()); CHECK_HIP_ERROR(dTaup.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_gebrd_bufferSize(FORTRAN, handle, m, n, dA.data(), lda, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments gebrd_checkBadArgs(handle, m, n, dA.data(), lda, stA, dD.data(), stD, dE.data(), stE, dTauq.data(), stQ, dTaup.data(), stP, dWork.data(), size_W, dInfo.data(), bc); } } template void gebrd_initData(const hipsolverHandle_t handle, const int m, const int n, Td& dA, const int lda, const int stA, Sd& dD, const int stD, Sd& dE, const int stE, Ud& dTauq, const int stQ, Ud& dTaup, const int stP, const int bc, Th& hA, Sh& hD, Sh& hE, Uh& hTauq, Uh& hTaup) { if(CPU) { rocblas_init(hA, true); // scale A to avoid singularities for(int b = 0; b < bc; ++b) { for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) { if(i == j || (m >= n && j == i + 1) || (m < n && i == j + 1)) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void gebrd_getError(const hipsolverHandle_t handle, const int m, const int n, Td& dA, const int lda, const int stA, Sd& dD, const int stD, Sd& dE, const int stE, Ud& dTauq, const int stQ, Ud& dTaup, const int stP, Ud& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Th& hARes, Sh& hD, Sh& hE, Uh& hTauq, Uh& hTaup, Vh& hInfo, Vh& hInfoRes, double* max_err) { constexpr bool COMPLEX = is_complex; constexpr bool VERIFY_IMPLICIT_TEST = false; std::vector hW(max(m, n)); // input data initialization gebrd_initData(handle, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, bc, hA, hD, hE, hTauq, hTaup); // execute computations // use verify_implicit_test to check correctness of the implicit test using // CPU lapack if(!VERIFY_IMPLICIT_TEST) { // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_gebrd(FORTRAN, handle, m, n, dA.data(), lda, stA, dD.data(), stD, dE.data(), stE, dTauq.data(), stQ, dTaup.data(), stP, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hTauq.transfer_from(dTauq)); CHECK_HIP_ERROR(hTaup.transfer_from(dTaup)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); } else { // CPU lapack for(int b = 0; b < bc; ++b) { memcpy(hARes[b], hA[b], lda * n * sizeof(T)); cblas_gebrd(m, n, hARes[b], lda, hD[b], hE[b], hTauq[b], hTaup[b], hW.data(), max(m, n), hInfoRes[b]); } } // reconstruct A from the factorization for implicit testing std::vector vec(max(m, n)); vec[0] = 1; for(int b = 0; b < bc; ++b) { T* a = hARes[b]; T* tauq = hTauq[b]; T* taup = hTaup[b]; if(m >= n) { for(int j = n - 1; j >= 0; j--) { if(j < n - 1) { if(COMPLEX) { cblas_lacgv(1, taup + j, 1); cblas_lacgv(n - j - 1, a + j + (j + 1) * lda, lda); } for(int i = 1; i < n - j - 1; i++) { vec[i] = a[j + (j + i + 1) * lda]; a[j + (j + i + 1) * lda] = 0; } cblas_larf(HIPSOLVER_SIDE_RIGHT, m - j, n - j - 1, vec.data(), 1, taup + j, a + j + (j + 1) * lda, lda, hW.data()); if(COMPLEX) cblas_lacgv(1, taup + j, 1); } for(int i = 1; i < m - j; i++) { vec[i] = a[(j + i) + j * lda]; a[(j + i) + j * lda] = 0; } cblas_larf(HIPSOLVER_SIDE_LEFT, m - j, n - j, vec.data(), 1, tauq + j, a + j + j * lda, lda, hW.data()); } } else { for(int j = m - 1; j >= 0; j--) { if(j < m - 1) { for(int i = 1; i < m - j - 1; i++) { vec[i] = a[(j + i + 1) + j * lda]; a[(j + i + 1) + j * lda] = 0; } cblas_larf(HIPSOLVER_SIDE_LEFT, m - j - 1, n - j, vec.data(), 1, tauq + j, a + (j + 1) + j * lda, lda, hW.data()); } if(COMPLEX) { cblas_lacgv(1, taup + j, 1); cblas_lacgv(n - j, a + j + j * lda, lda); } for(int i = 1; i < n - j; i++) { vec[i] = a[j + (j + i) * lda]; a[j + (j + i) * lda] = 0; } cblas_larf(HIPSOLVER_SIDE_RIGHT, m - j, n - j, vec.data(), 1, taup + j, a + j + j * lda, lda, hW.data()); if(COMPLEX) cblas_lacgv(1, taup + j, 1); } } } // error is ||hA - hARes|| / ||hA|| // using frobenius norm double err; *max_err = 0; for(int b = 0; b < bc; ++b) { err = norm_error('F', m, n, lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; } // check info err = 0; for(int b = 0; b < bc; ++b) if(hInfoRes[b][0] != 0) err++; *max_err += err; } template void gebrd_getPerfData(const hipsolverHandle_t handle, const int m, const int n, Td& dA, const int lda, const int stA, Sd& dD, const int stD, Sd& dE, const int stE, Ud& dTauq, const int stQ, Ud& dTaup, const int stP, Ud& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Sh& hD, Sh& hE, Uh& hTauq, Uh& hTaup, Vh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { std::vector hW(max(m, n)); if(!perf) { gebrd_initData(handle, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, bc, hA, hD, hE, hTauq, hTaup); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_gebrd( m, n, hA[b], lda, hD[b], hE[b], hTauq[b], hTaup[b], hW.data(), max(m, n), hInfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } gebrd_initData(handle, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, bc, hA, hD, hE, hTauq, hTaup); // cold calls for(int iter = 0; iter < 2; iter++) { gebrd_initData(handle, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, bc, hA, hD, hE, hTauq, hTaup); CHECK_ROCBLAS_ERROR(hipsolver_gebrd(FORTRAN, handle, m, n, dA.data(), lda, stA, dD.data(), stD, dE.data(), stE, dTauq.data(), stQ, dTaup.data(), stP, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { gebrd_initData(handle, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, bc, hA, hD, hE, hTauq, hTaup); start = get_time_us_sync(stream); hipsolver_gebrd(FORTRAN, handle, m, n, dA.data(), lda, stA, dD.data(), stD, dE.data(), stE, dTauq.data(), stQ, dTaup.data(), stP, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_gebrd(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; int m = argus.get("m"); int n = argus.get("n", m); int lda = argus.get("lda", m); int stA = argus.get("strideA", lda * n); int stD = argus.get("strideD", min(m, n)); int stE = argus.get("strideE", min(m, n) - 1); int stQ = argus.get("strideQ", min(m, n)); int stP = argus.get("strideP", min(m, n)); int bc = argus.batch_count; int hot_calls = argus.iters; int stARes = (argus.unit_check || argus.norm_check) ? stA : 0; // check non-supported values // N/A // determine sizes size_t size_A = lda * n; size_t size_D = min(m, n); size_t size_E = min(m, n) - 1; size_t size_Q = min(m, n); size_t size_P = min(m, n); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; // check invalid sizes bool invalid_size = (m < 0 || n < 0 || lda < m || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_gebrd(FORTRAN, // handle, // m, // n, // (T* const*)nullptr, // lda, // stA, // (S*)nullptr, // stD, // (S*)nullptr, // stE, // (T*)nullptr, // stQ, // (T*)nullptr, // stP, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_gebrd(FORTRAN, handle, m, n, (T*)nullptr, lda, stA, (S*)nullptr, stD, (S*)nullptr, stE, (T*)nullptr, stQ, (T*)nullptr, stP, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_gebrd_bufferSize(FORTRAN, handle, m, n, (T*)nullptr, lda, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // host_strided_batch_vector hD(size_D, 1, stD, bc); // host_strided_batch_vector hE(size_E, 1, stE, bc); // host_strided_batch_vector hTaup(size_P, 1, stP, bc); // host_strided_batch_vector hTauq(size_Q, 1, stQ, bc); // host_strided_batch_vector hInfo(1, 1, 1, bc); // host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_strided_batch_vector dD(size_D, 1, stD, bc); // device_strided_batch_vector dE(size_E, 1, stE, bc); // device_strided_batch_vector dTauq(size_Q, 1, stQ, bc); // device_strided_batch_vector dTaup(size_P, 1, stP, bc); // device_strided_batch_vector dInfo(1, 1, 1, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // if(size_D) // CHECK_HIP_ERROR(dD.memcheck()); // if(size_E) // CHECK_HIP_ERROR(dE.memcheck()); // if(size_Q) // CHECK_HIP_ERROR(dTauq.memcheck()); // if(size_P) // CHECK_HIP_ERROR(dTaup.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // gebrd_getError(handle, // m, // n, // dA, // lda, // stA, // dD, // stD, // dE, // stE, // dTauq, // stQ, // dTaup, // stP, // dWork, // size_W, // dInfo, // bc, // hA, // hARes, // hD, // hE, // hTauq, // hTaup, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // gebrd_getPerfData(handle, // m, // n, // dA, // lda, // stA, // dD, // stD, // dE, // stE, // dTauq, // stQ, // dTaup, // stP, // dWork, // size_W, // dInfo, // bc, // hA, // hD, // hE, // hTauq, // hTaup, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); host_strided_batch_vector hD(size_D, 1, stD, bc); host_strided_batch_vector hE(size_E, 1, stE, bc); host_strided_batch_vector hTaup(size_P, 1, stP, bc); host_strided_batch_vector hTauq(size_Q, 1, stQ, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dD(size_D, 1, stD, bc); device_strided_batch_vector dE(size_E, 1, stE, bc); device_strided_batch_vector dTauq(size_Q, 1, stQ, bc); device_strided_batch_vector dTaup(size_P, 1, stP, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_D) CHECK_HIP_ERROR(dD.memcheck()); if(size_E) CHECK_HIP_ERROR(dE.memcheck()); if(size_Q) CHECK_HIP_ERROR(dTauq.memcheck()); if(size_P) CHECK_HIP_ERROR(dTaup.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) gebrd_getError(handle, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, dWork, size_W, dInfo, bc, hA, hARes, hD, hE, hTauq, hTaup, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) gebrd_getPerfData(handle, m, n, dA, lda, stA, dD, stD, dE, stE, dTauq, stQ, dTaup, stP, dWork, size_W, dInfo, bc, hA, hD, hE, hTauq, hTaup, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using m*n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, m * n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("m", "n", "lda", "strideP", "batch_c"); rocsolver_bench_output(m, n, lda, stP, bc); } else if(STRIDED) { rocsolver_bench_output("m", "n", "lda", "strideA", "strideP", "batch_c"); rocsolver_bench_output(m, n, lda, stA, stP, bc); } else { rocsolver_bench_output("m", "n", "lda"); rocsolver_bench_output(m, n, lda); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_gels.hpp000066400000000000000000001105341436107207300224660ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void gels_checkBadArgs(const hipsolverHandle_t handle, const int m, const int n, const int nrhs, U dA, const int lda, const int stA, U dB, const int ldb, const int stB, U dX, const int ldx, const int stX, U dWork, const size_t lwork, int* niters, int* info, const int bc) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // handle EXPECT_ROCBLAS_STATUS(hipsolver_gels(API, false, nullptr, m, n, nrhs, dA, lda, stA, dB, ldb, stB, dX, ldx, stX, dWork, lwork, niters, info, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values // N/A // pointers EXPECT_ROCBLAS_STATUS(hipsolver_gels(API, false, handle, m, n, nrhs, (U) nullptr, lda, stA, dB, ldb, stB, dX, ldx, stX, dWork, lwork, niters, info, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gels(API, false, handle, m, n, nrhs, dA, lda, stA, (U) nullptr, ldb, stB, dX, ldx, stX, dWork, lwork, niters, info, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gels(API, false, handle, m, n, nrhs, dA, lda, stA, dB, ldb, stB, (U) nullptr, ldx, stX, dWork, lwork, niters, info, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gels(API, false, handle, m, n, nrhs, dA, lda, stA, dB, ldb, stB, dX, ldx, stX, dWork, lwork, niters, nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_gels_bad_arg() { // safe arguments hipsolver_local_handle handle; int m = 1; int n = 1; int nrhs = 1; int lda = 1; int ldb = 1; int ldx = 1; int stA = 1; int stB = 1; int stX = 1; int bc = 1; if(BATCHED) { // // memory allocations // host_strided_batch_vector hNIters(1, 1, 1, bc); // device_batch_vector dA(1, 1, 1); // device_batch_vector dB(1, 1, 1); // device_batch_vector dX(1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dB.memcheck()); // CHECK_HIP_ERROR(dX.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // size_t size_W; // hipsolver_gels_bufferSize( // API, handle, m, n, nrhs, dA.data(), lda, dB.data(), ldb, dX.data(), ldx, &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // gels_checkBadArgs(handle, // m, // n, // nrhs, // dA.data(), // lda, // stA, // dB.data(), // ldb, // stB, // dX.data(), // ldx, // stX, // dWork.data(), // size_W, // hNIters.data(), // dInfo.data(), // bc); } else { // memory allocations host_strided_batch_vector hNIters(1, 1, 1, bc); device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dB(1, 1, 1, 1); device_strided_batch_vector dX(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dX.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); size_t size_W; hipsolver_gels_bufferSize( API, handle, m, n, nrhs, dA.data(), lda, dB.data(), ldb, dX.data(), ldx, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments gels_checkBadArgs(handle, m, n, nrhs, dA.data(), lda, stA, dB.data(), ldb, stB, dX.data(), ldx, stX, dWork.data(), size_W, hNIters.data(), dInfo.data(), bc); } } template void gels_initData(const hipsolverHandle_t handle, const int m, const int n, const int nrhs, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, Ud& dInfo, const int bc, Th& hA, Th& hB, Th& hX, Uh& hInfo) { if(CPU) { rocblas_init(hA, true); rocblas_init(hB, true); const int max_index = std::max(0, std::min(m, n) - 1); std::uniform_int_distribution sample_index(0, max_index); std::bernoulli_distribution coinflip(0.5); const int ldx = max(m, n); for(int b = 0; b < bc; ++b) { // scale A to avoid singularities for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // populate hX with values from hB for(int i = 0; i < m; i++) for(int j = 0; j < nrhs; j++) hX[b][i + j * ldx] = hB[b][i + j * ldb]; } } if(GPU) { // now copy pivoting indices and matrices to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); } } template void gels_getError(const hipsolverHandle_t handle, const int m, const int n, const int nrhs, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, Td& dX, const int ldx, const int stX, Td& dWork, const size_t lwork, Ud& dInfo, const int bc, Th& hA, Th& hB, Th& hBRes, Th& hX, Th& hXRes, Uh& hNIters, Uh& hInfo, Uh& hInfoRes, double* max_err) { int sizeW = max(1, min(m, n) + max(min(m, n), nrhs)); std::vector hW(sizeW); // input data initialization gels_initData( handle, m, n, nrhs, dA, lda, stA, dB, ldb, stB, dInfo, bc, hA, hB, hX, hInfo); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_gels(API, INPLACE, handle, m, n, nrhs, dA.data(), lda, stA, dB.data(), ldb, stB, dX.data(), ldx, stX, dWork.data(), lwork, hNIters.data(), dInfo.data(), bc)); CHECK_HIP_ERROR(hBRes.transfer_from(dB)); CHECK_HIP_ERROR(hXRes.transfer_from(dX)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack for(int b = 0; b < bc; ++b) { cblas_gels( HIPSOLVER_OP_N, m, n, nrhs, hA[b], lda, hX[b], max(m, n), hW.data(), sizeW, hInfo[b]); } // error is ||hX - hXRes|| / ||hX|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using vector-induced infinity norm double err; *max_err = 0; for(int b = 0; b < bc; ++b) { if(!INPLACE) { err = norm_error('F', m, nrhs, ldb, hB[b], hBRes[b]); *max_err = err > *max_err ? err : *max_err; if(hInfo[b][0] == 0) { err = norm_error('I', n, nrhs, max(m, n), hX[b], hXRes[b], ldx); *max_err = err > *max_err ? err : *max_err; } } else { if(hInfo[b][0] == 0) { err = norm_error('I', n, nrhs, max(m, n), hX[b], hBRes[b], ldb); *max_err = err > *max_err ? err : *max_err; } } } // also check info for singularities err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) err++; *max_err += err; } template void gels_getPerfData(const hipsolverHandle_t handle, const int m, const int n, const int nrhs, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, Td& dX, const int ldx, const int stX, Td& dWork, const size_t lwork, Ud& dInfo, const int bc, Th& hA, Th& hB, Th& hX, Uh& hNIters, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { int sizeW = max(1, min(m, n) + max(min(m, n), nrhs)); std::vector hW(sizeW); if(!perf) { gels_initData( handle, m, n, nrhs, dA, lda, stA, dB, ldb, stB, dInfo, bc, hA, hB, hX, hInfo); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) { cblas_gels(HIPSOLVER_OP_N, m, n, nrhs, hA[b], lda, hX[b], max(m, n), hW.data(), sizeW, hInfo[b]); } *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } gels_initData( handle, m, n, nrhs, dA, lda, stA, dB, ldb, stB, dInfo, bc, hA, hB, hX, hInfo); // cold calls for(int iter = 0; iter < 2; iter++) { gels_initData( handle, m, n, nrhs, dA, lda, stA, dB, ldb, stB, dInfo, bc, hA, hB, hX, hInfo); CHECK_ROCBLAS_ERROR(hipsolver_gels(API, INPLACE, handle, m, n, nrhs, dA.data(), lda, stA, dB.data(), ldb, stB, dX.data(), ldx, stX, dWork.data(), lwork, hNIters.data(), dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { gels_initData( handle, m, n, nrhs, dA, lda, stA, dB, ldb, stB, dInfo, bc, hA, hB, hX, hInfo); start = get_time_us_sync(stream); hipsolver_gels(API, INPLACE, handle, m, n, nrhs, dA.data(), lda, stA, dB.data(), ldb, stB, dX.data(), ldx, stX, dWork.data(), lwork, hNIters.data(), dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template > void testing_gels(Arguments& argus) { // get arguments hipsolver_local_handle handle; int m = argus.get("m"); int n = argus.get("n", m); int nrhs = argus.get("nrhs", n); int lda = argus.get("lda", m); int ldb = argus.get("ldb", m); int ldx = argus.get("ldx", n); int stA = argus.get("strideA", lda * n); int stB = argus.get("strideB", ldb * nrhs); int stX = argus.get("strideX", ldx * nrhs); int bc = argus.batch_count; int hot_calls = argus.iters; int stBRes = (argus.unit_check || argus.norm_check) ? stB : 0; int stXRes = (argus.unit_check || argus.norm_check) ? stX : 0; // check non-supported values // N/A // determine sizes size_t size_A = size_t(lda) * n; size_t size_B = size_t(ldb) * nrhs; size_t size_X = size_t(ldx) * nrhs; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_BRes = (argus.unit_check || argus.norm_check) ? size_B : 0; size_t size_XRes = (argus.unit_check || argus.norm_check) ? size_X : 0; // check invalid sizes bool invalid_size = (m < 0 || n < 0 || nrhs < 0 || lda < m || ldb < m || ldx < n || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_gels(API, // INPLACE, // handle, // m, // n, // nrhs, // (T* const*)nullptr, // lda, // stA, // (T* const*)nullptr, // ldb, // stB, // (T* const*)nullptr, // ldx, // stX, // (T*)nullptr, // 0, // (int*)nullptr, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_gels(API, INPLACE, handle, m, n, nrhs, (T*)nullptr, lda, stA, (T*)nullptr, ldb, stB, (T*)nullptr, ldx, stX, (T*)nullptr, 0, (int*)nullptr, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary size_t size_W; hipsolver_gels_bufferSize( API, handle, m, n, nrhs, (T*)nullptr, lda, (T*)nullptr, ldb, (T*)nullptr, ldx, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hB(size_B, 1, bc); // host_batch_vector hBRes(size_BRes, 1, bc); // host_batch_vector hX(max(m, n) * nrhs, 1, bc); // host_batch_vector hXRes(size_XRes, 1, bc); // host_strided_batch_vector hNIters(1, 1, 1, bc); // host_strided_batch_vector hInfo(1, 1, 1, bc); // host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_batch_vector dB(size_B, 1, bc); // device_batch_vector dX(size_X, 1, bc); // device_strided_batch_vector dInfo(1, 1, 1, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // if(size_B) // CHECK_HIP_ERROR(dB.memcheck()); // if(size_X) // CHECK_HIP_ERROR(dX.memcheck()); // if(bc) // CHECK_HIP_ERROR(dInfo.memcheck()); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // gels_getError(handle, // m, // n, // nrhs, // dA, // lda, // stA, // dB, // ldb, // stB, // dX, // ldx, // stX, // dWork, // size_W, // dInfo, // bc, // hA, // hB, // hBRes, // hX, // hXRes, // hNIters, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // gels_getPerfData(handle, // m, // n, // nrhs, // dA, // lda, // stA, // dB, // ldb, // stB, // dX, // ldx, // stX, // dWork, // size_W, // dInfo, // bc, // hA, // hB, // hX, // hNIters, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hB(size_B, 1, stB, bc); host_strided_batch_vector hBRes(size_BRes, 1, stBRes, bc); host_strided_batch_vector hX(max(m, n) * nrhs, 1, max(m, n) * nrhs, bc); host_strided_batch_vector hXRes(size_XRes, 1, stXRes, bc); host_strided_batch_vector hNIters(1, 1, 1, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dB(size_B, 1, stB, bc); device_strided_batch_vector dX(size_X, 1, stX, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_B) CHECK_HIP_ERROR(dB.memcheck()); if(size_X) CHECK_HIP_ERROR(dX.memcheck()); if(bc) CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) gels_getError(handle, m, n, nrhs, dA, lda, stA, dB, ldb, stB, dX, ldx, stX, dWork, size_W, dInfo, bc, hA, hB, hBRes, hX, hXRes, hNIters, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) gels_getPerfData(handle, m, n, nrhs, dA, lda, stA, dB, ldb, stB, dX, ldx, stX, dWork, size_W, dInfo, bc, hA, hB, hX, hNIters, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using max(m,n) * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, max(m, n)); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("m", "n", "nrhs", "lda", "ldb", "ldx", "batch_c"); rocsolver_bench_output(m, n, nrhs, lda, ldb, ldx, bc); } else if(STRIDED) { rocsolver_bench_output("m", "n", "nrhs", "lda", "ldb", "ldx", "strideA", "strideB", "strideX", "batch_c"); rocsolver_bench_output(m, n, nrhs, lda, ldb, ldx, stA, stB, stX, bc); } else { rocsolver_bench_output("m", "n", "nrhs", "lda", "ldb", "ldx"); rocsolver_bench_output(m, n, nrhs, lda, ldb, ldx); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_geqrf.hpp000066400000000000000000000565161436107207300226510ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void geqrf_checkBadArgs(const hipsolverHandle_t handle, const int m, const int n, T dA, const int lda, const int stA, U dIpiv, const int stP, U dWork, const int lwork, V dInfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_geqrf(FORTRAN, nullptr, m, n, dA, lda, stA, dIpiv, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values // N/A #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS( hipsolver_geqrf( FORTRAN, handle, m, n, (T) nullptr, lda, stA, dIpiv, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_geqrf( FORTRAN, handle, m, n, dA, lda, stA, (U) nullptr, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_geqrf( FORTRAN, handle, m, n, dA, lda, stA, dIpiv, stP, dWork, lwork, (V) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_geqrf_bad_arg() { // safe arguments hipsolver_local_handle handle; int m = 1; int n = 1; int lda = 1; int stA = 1; int stP = 1; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dIpiv(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dIpiv.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // int size_W; // hipsolver_geqrf_bufferSize(FORTRAN, handle, m, n, dA.data(), lda, &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // geqrf_checkBadArgs(handle, // m, // n, // dA.data(), // lda, // stA, // dIpiv.data(), // stP, // dWork.data(), // size_W, // dInfo.data(), // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_geqrf_bufferSize(FORTRAN, handle, m, n, dA.data(), lda, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments geqrf_checkBadArgs(handle, m, n, dA.data(), lda, stA, dIpiv.data(), stP, dWork.data(), size_W, dInfo.data(), bc); } } template void geqrf_initData(const hipsolverHandle_t handle, const int m, const int n, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, const int bc, Th& hA, Uh& hIpiv) { if(CPU) { rocblas_init(hA, true); // scale A to avoid singularities for(int b = 0; b < bc; ++b) { for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void geqrf_getError(const hipsolverHandle_t handle, const int m, const int n, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Ud& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Th& hARes, Uh& hIpiv, Vh& hInfo, Vh& hInfoRes, double* max_err) { std::vector hW(n); // input data initialization geqrf_initData(handle, m, n, dA, lda, stA, dIpiv, stP, bc, hA, hIpiv); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_geqrf(FORTRAN, handle, m, n, dA.data(), lda, stA, dIpiv.data(), stP, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack for(int b = 0; b < bc; ++b) cblas_geqrf(m, n, hA[b], lda, hIpiv[b], hW.data(), n, hInfo[b]); // error is ||hA - hARes|| / ||hA|| (ideally ||QR - Qres Rres|| / ||QR||) // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm double err; *max_err = 0; for(int b = 0; b < bc; ++b) { err = norm_error('F', m, n, lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; } // check info err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) err++; *max_err += err; } template void geqrf_getPerfData(const hipsolverHandle_t handle, const int m, const int n, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Ud& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Uh& hIpiv, Vh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { std::vector hW(n); if(!perf) { geqrf_initData(handle, m, n, dA, lda, stA, dIpiv, stP, bc, hA, hIpiv); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_geqrf(m, n, hA[b], lda, hIpiv[b], hW.data(), n, hInfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } geqrf_initData(handle, m, n, dA, lda, stA, dIpiv, stP, bc, hA, hIpiv); // cold calls for(int iter = 0; iter < 2; iter++) { geqrf_initData(handle, m, n, dA, lda, stA, dIpiv, stP, bc, hA, hIpiv); CHECK_ROCBLAS_ERROR(hipsolver_geqrf(FORTRAN, handle, m, n, dA.data(), lda, stA, dIpiv.data(), stP, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { geqrf_initData(handle, m, n, dA, lda, stA, dIpiv, stP, bc, hA, hIpiv); start = get_time_us_sync(stream); hipsolver_geqrf(FORTRAN, handle, m, n, dA.data(), lda, stA, dIpiv.data(), stP, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_geqrf(Arguments& argus) { // get arguments hipsolver_local_handle handle; int m = argus.get("m"); int n = argus.get("n", m); int lda = argus.get("lda", m); int stA = argus.get("strideA", lda * n); int stP = argus.get("strideP", min(m, n)); int bc = argus.batch_count; int hot_calls = argus.iters; int stARes = (argus.unit_check || argus.norm_check) ? stA : 0; // check non-supported values // N/A // determine sizes size_t size_A = size_t(lda) * n; size_t size_P = size_t(min(m, n)); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; // check invalid sizes bool invalid_size = (m < 0 || n < 0 || lda < m || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_geqrf(FORTRAN, // handle, // m, // n, // (T* const*)nullptr, // lda, // stA, // (T*)nullptr, // stP, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_geqrf(FORTRAN, handle, m, n, (T*)nullptr, lda, stA, (T*)nullptr, stP, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_geqrf_bufferSize(FORTRAN, handle, m, n, (T*)nullptr, lda, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // host_strided_batch_vector hIpiv(size_P, 1, stP, bc); // host_strided_batch_vector hInfo(1, 1, 1, bc); // host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_strided_batch_vector dIpiv(size_P, 1, stP, bc); // device_strided_batch_vector dInfo(1, 1, 1, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // if(size_P) // CHECK_HIP_ERROR(dIpiv.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // geqrf_getError(handle, // m, // n, // dA, // lda, // stA, // dIpiv, // stP, // dWork, // size_W, // dInfo, // bc, // hA, // hARes, // hIpiv, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // geqrf_getPerfData(handle, // m, // n, // dA, // lda, // stA, // dIpiv, // stP, // dWork, // size_W, // dInfo, // bc, // hA, // hIpiv, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); host_strided_batch_vector hIpiv(size_P, 1, stP, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dIpiv(size_P, 1, stP, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) geqrf_getError(handle, m, n, dA, lda, stA, dIpiv, stP, dWork, size_W, dInfo, bc, hA, hARes, hIpiv, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) geqrf_getPerfData(handle, m, n, dA, lda, stA, dIpiv, stP, dWork, size_W, dInfo, bc, hA, hIpiv, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using m * machine_precision as tolerance // (for possibly singular of ill-conditioned matrices we could use m*min(m,n)) if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, m); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("m", "n", "lda", "strideP", "batch_c"); rocsolver_bench_output(m, n, lda, stP, bc); } else if(STRIDED) { rocsolver_bench_output("m", "n", "lda", "strideA", "strideP", "batch_c"); rocsolver_bench_output(m, n, lda, stA, stP, bc); } else { rocsolver_bench_output("m", "n", "lda"); rocsolver_bench_output(m, n, lda); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_gesv.hpp000066400000000000000000001141111436107207300224730ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void gesv_checkBadArgs(const hipsolverHandle_t handle, const int n, const int nrhs, T dA, const int lda, const int stA, U dIpiv, const int stP, T dB, const int ldb, const int stB, T dX, const int ldx, const int stX, T dWork, const size_t lwork, U niters, U dInfo, const int bc) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // handle EXPECT_ROCBLAS_STATUS(hipsolver_gesv(API, false, nullptr, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, dX, ldx, stX, dWork, lwork, niters, dInfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values // N/A // pointers EXPECT_ROCBLAS_STATUS(hipsolver_gesv(API, false, handle, n, nrhs, (T) nullptr, lda, stA, dIpiv, stP, dB, ldb, stB, dX, ldx, stX, dWork, lwork, niters, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesv(API, false, handle, n, nrhs, dA, lda, stA, (U) nullptr, stP, dB, ldb, stB, dX, ldx, stX, dWork, lwork, niters, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesv(API, false, handle, n, nrhs, dA, lda, stA, dIpiv, stP, (T) nullptr, ldb, stB, dX, ldx, stX, dWork, lwork, niters, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesv(API, false, handle, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, (T) nullptr, ldx, stX, dWork, lwork, niters, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesv(API, false, handle, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, dX, ldx, stX, dWork, lwork, niters, (U) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_gesv_bad_arg() { // safe arguments hipsolver_local_handle handle; int n = 1; int nrhs = 1; int lda = 1; int ldb = 1; int ldx = 1; int stA = 1; int stP = 1; int stB = 1; int stX = 1; int bc = 1; if(BATCHED) { // // memory allocations // host_strided_batch_vector hNiters(1, 1, 1, 1); // device_batch_vector dA(1, 1, 1); // device_batch_vector dB(1, 1, 1); // device_batch_vector dX(1, 1, 1); // device_strided_batch_vector dIpiv(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dB.memcheck()); // CHECK_HIP_ERROR(dX.memcheck()); // CHECK_HIP_ERROR(dIpiv.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // size_t size_W; // hipsolver_gesv_bufferSize(API, // handle, // n, // nrhs, // dA.data(), // lda, // dIpiv.data(), // dB.data(), // ldb, // dX.data(), // ldx, // &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // gesv_checkBadArgs(handle, // n, // nrhs, // dA.data(), // lda, // stA, // dIpiv.data(), // stP, // dB.data(), // ldb, // stB, // dX.data(), // ldx, // stX, // dWork.data(), // size_W, // hNiters.data(), // dInfo.data(), // bc); } else { // memory allocations host_strided_batch_vector hNiters(1, 1, 1, 1); device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dB(1, 1, 1, 1); device_strided_batch_vector dX(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dX.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); size_t size_W; hipsolver_gesv_bufferSize(API, handle, n, nrhs, dA.data(), lda, dIpiv.data(), dB.data(), ldb, dX.data(), ldx, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments gesv_checkBadArgs(handle, n, nrhs, dA.data(), lda, stA, dIpiv.data(), stP, dB.data(), ldb, stB, dX.data(), ldx, stX, dWork.data(), size_W, hNiters.data(), dInfo.data(), bc); } } template void gesv_initData(const hipsolverHandle_t handle, const int n, const int nrhs, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Td& dB, const int ldb, const int stB, const int bc, Th& hA, Uh& hIpiv, Th& hB) { if(CPU) { rocblas_init(hA, true); rocblas_init(hB, true); // scale A to avoid singularities for(int b = 0; b < bc; ++b) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } } if(GPU) { // now copy pivoting indices and matrices to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dIpiv.transfer_from(hIpiv)); } } template void gesv_getError(const hipsolverHandle_t handle, const int n, const int nrhs, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Td& dB, const int ldb, const int stB, Td& dX, const int ldx, const int stX, Td& dWork, const size_t lwork, Ud& dInfo, const int bc, Th& hA, Uh& hIpiv, Th& hB, Th& hBRes, Uh& hNiters, Uh& hInfo, Uh& hInfoRes, double* max_err) { // input data initialization gesv_initData( handle, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_gesv(API, INPLACE, handle, n, nrhs, dA.data(), lda, stA, dIpiv.data(), stP, dB.data(), ldb, stB, dX.data(), ldx, stX, dWork.data(), lwork, hNiters.data(), dInfo.data(), bc)); if(!INPLACE) CHECK_HIP_ERROR(hBRes.transfer_from(dX)); else CHECK_HIP_ERROR(hBRes.transfer_from(dB)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack for(int b = 0; b < bc; ++b) { cblas_gesv(n, nrhs, hA[b], lda, hIpiv[b], hB[b], ldb, hInfo[b]); } // error is ||hB - hBRes|| / ||hB|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using vector-induced infinity norm double err; *max_err = 0; for(int b = 0; b < bc; ++b) { if(hInfoRes[b][0] == 0) { err = norm_error('I', n, nrhs, ldb, hB[b], hBRes[b], (!INPLACE ? ldx : ldb)); *max_err = err > *max_err ? err : *max_err; } } // also check info for singularities err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) err++; *max_err += err; } template void gesv_getPerfData(const hipsolverHandle_t handle, const int n, const int nrhs, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Td& dB, const int ldb, const int stB, Td& dX, const int ldx, const int stX, Td& dWork, const size_t lwork, Ud& dInfo, const int bc, Th& hA, Uh& hIpiv, Th& hB, Uh& hNiters, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { if(!perf) { gesv_initData( handle, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) { cblas_gesv(n, nrhs, hA[b], lda, hIpiv[b], hB[b], ldb, hInfo[b]); } *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } gesv_initData( handle, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); // cold calls for(int iter = 0; iter < 2; iter++) { gesv_initData( handle, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); CHECK_ROCBLAS_ERROR(hipsolver_gesv(API, INPLACE, handle, n, nrhs, dA.data(), lda, stA, dIpiv.data(), stP, dB.data(), ldb, stB, dX.data(), ldx, stX, dWork.data(), lwork, hNiters.data(), dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { gesv_initData( handle, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); start = get_time_us_sync(stream); hipsolver_gesv(API, INPLACE, handle, n, nrhs, dA.data(), lda, stA, dIpiv.data(), stP, dB.data(), ldb, stB, dX.data(), ldx, stX, dWork.data(), lwork, hNiters.data(), dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_gesv(Arguments& argus) { // get arguments hipsolver_local_handle handle; int n = argus.get("n"); int nrhs = argus.get("nrhs", n); int lda = argus.get("lda", n); int ldb = argus.get("ldb", n); int ldx = argus.get("ldx", n); int stA = argus.get("strideA", lda * n); int stP = argus.get("strideP", n); int stB = argus.get("strideB", ldb * nrhs); int stX = argus.get("strideX", ldx * nrhs); int bc = argus.batch_count; int hot_calls = argus.iters; int stBRes = (argus.unit_check || argus.norm_check) ? stX : 0; // check non-supported values // N/A // determine sizes size_t size_A = size_t(lda) * n; size_t size_B = size_t(ldb) * nrhs; size_t size_X = size_t(ldx) * nrhs; size_t size_P = size_t(n); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_BRes = (argus.unit_check || argus.norm_check) ? (!INPLACE ? size_X : size_B) : 0; // check invalid sizes bool invalid_size = (n < 0 || nrhs < 0 || lda < n || ldb < n || ldx < n || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_gesv(API, // INPLACE, // handle, // n, // nrhs, // (T* const*)nullptr, // lda, // stA, // (int*)nullptr, // stP, // (T* const*)nullptr, // ldb, // stB, // (T* const*)nullptr, // ldx, // stX, // (T*)nullptr, // 0, // (int*)nullptr, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_gesv(API, INPLACE, handle, n, nrhs, (T*)nullptr, lda, stA, (int*)nullptr, stP, (T*)nullptr, ldb, stB, (T*)nullptr, ldx, stX, (T*)nullptr, 0, (int*)nullptr, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary size_t size_W; hipsolver_gesv_bufferSize(API, handle, n, nrhs, (T*)nullptr, lda, (int*)nullptr, (T*)nullptr, ldb, (T*)nullptr, ldx, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hB(size_B, 1, bc); // host_batch_vector hBRes(size_BRes, 1, bc); // host_strided_batch_vector hIpiv(size_P, 1, stP, bc); // host_strided_batch_vector hNiters(1, 1, 1, bc); // host_strided_batch_vector hInfo(1, 1, 1, bc); // host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_batch_vector dB(size_B, 1, bc); // device_batch_vector dX(size_X, 1, bc); // device_strided_batch_vector dIpiv(size_P, 1, stP, bc); // device_strided_batch_vector dInfo(1, 1, 1, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // if(size_B) // CHECK_HIP_ERROR(dB.memcheck()); // if(size_X) // CHECK_HIP_ERROR(dX.memcheck()); // if(size_P) // CHECK_HIP_ERROR(dIpiv.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // gesv_getError(handle, // n, // nrhs, // dA, // lda, // stA, // dIpiv, // stP, // dB, // ldb, // stB, // dX, // ldx, // stX, // dWork, // size_W, // dInfo, // bc, // hA, // hIpiv, // hB, // hBRes, // hNiters, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // gesv_getPerfData(handle, // n, // nrhs, // dA, // lda, // stA, // dIpiv, // stP, // dB, // ldb, // stB, // dX, // ldx, // stX, // dWork, // size_W, // dInfo, // bc, // hA, // hIpiv, // hB, // hNiters, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hB(size_B, 1, stB, bc); host_strided_batch_vector hBRes(size_BRes, 1, stBRes, bc); host_strided_batch_vector hIpiv(size_P, 1, stP, bc); host_strided_batch_vector hNiters(1, 1, 1, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dB(size_B, 1, stB, bc); device_strided_batch_vector dX(size_X, 1, stX, bc); device_strided_batch_vector dIpiv(size_P, 1, stP, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_B) CHECK_HIP_ERROR(dB.memcheck()); if(size_X) CHECK_HIP_ERROR(dX.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) gesv_getError(handle, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, dX, ldx, stX, dWork, size_W, dInfo, bc, hA, hIpiv, hB, hBRes, hNiters, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) gesv_getPerfData(handle, n, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, dX, ldx, stX, dWork, size_W, dInfo, bc, hA, hIpiv, hB, hNiters, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("n", "nrhs", "lda", "ldb", "ldx", "strideP", "batch_c"); rocsolver_bench_output(n, nrhs, lda, ldb, ldx, stP, bc); } else if(STRIDED) { rocsolver_bench_output("n", "nrhs", "lda", "ldb", "ldx", "strideA", "strideP", "strideB", "strideX", "batch_c"); rocsolver_bench_output(n, nrhs, lda, ldb, ldx, stA, stP, stB, stX, bc); } else { rocsolver_bench_output("n", "nrhs", "lda", "ldb", "ldx"); rocsolver_bench_output(n, nrhs, lda, ldb, ldx); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_gesvd.hpp000066400000000000000000001612151436107207300226460ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void gesvd_checkBadArgs(const hipsolverHandle_t handle, const char left_svect, const char right_svect, const int m, const int n, W dA, const int lda, const int stA, TT dS, const int stS, T dU, const int ldu, const int stU, T dV, const int ldv, const int stV, T dWork, const int lwork, TT dE, const int stE, U dinfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, false, nullptr, left_svect, right_svect, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dE, stE, dinfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, false, handle, '\0', right_svect, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dE, stE, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, false, handle, left_svect, '\0', m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dE, stE, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, false, handle, 'O', 'O', m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dE, stE, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, false, handle, left_svect, right_svect, m, n, (W) nullptr, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dE, stE, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, false, handle, left_svect, right_svect, m, n, dA, lda, stA, (TT) nullptr, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dE, stE, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, false, handle, left_svect, right_svect, m, n, dA, lda, stA, dS, stS, (T) nullptr, ldu, stU, dV, ldv, stV, dWork, lwork, dE, stE, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, false, handle, left_svect, right_svect, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, (T) nullptr, ldv, stV, dWork, lwork, dE, stE, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, false, handle, left_svect, right_svect, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dE, stE, (U) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_gesvd_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; char left_svect = 'A'; char right_svect = 'A'; int m = 2; int n = 2; int lda = 2; int ldu = 2; int ldv = 2; int stA = 2; int stS = 2; int stU = 2; int stV = 2; int stE = 2; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dS(1, 1, 1, 1); // device_strided_batch_vector dU(1, 1, 1, 1); // device_strided_batch_vector dV(1, 1, 1, 1); // device_strided_batch_vector dE(1, 1, 1, 1); // device_strided_batch_vector dinfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dS.memcheck()); // CHECK_HIP_ERROR(dU.memcheck()); // CHECK_HIP_ERROR(dV.memcheck()); // CHECK_HIP_ERROR(dE.memcheck()); // CHECK_HIP_ERROR(dinfo.memcheck()); // int size_W; // hipsolver_gesvd_bufferSize(API, handle, left_svect, right_svect, m, n, dA.data(), lda, &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // gesvd_checkBadArgs(handle, left_svect, right_svect, m, n, dA.data(), lda, stA, // dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, // dWork.data(), size_W, dE.data(), stE, dinfo.data(), bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dS(1, 1, 1, 1); device_strided_batch_vector dU(1, 1, 1, 1); device_strided_batch_vector dV(1, 1, 1, 1); device_strided_batch_vector dE(1, 1, 1, 1); device_strided_batch_vector dinfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dS.memcheck()); CHECK_HIP_ERROR(dU.memcheck()); CHECK_HIP_ERROR(dV.memcheck()); CHECK_HIP_ERROR(dE.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); int size_W; hipsolver_gesvd_bufferSize( API, handle, left_svect, right_svect, m, n, dA.data(), lda, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments gesvd_checkBadArgs(handle, left_svect, right_svect, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), size_W, dE.data(), stE, dinfo.data(), bc); } } template void gesvd_initData(const hipsolverHandle_t handle, const char left_svect, const char right_svect, const int m, const int n, Td& dA, const int lda, const int bc, Th& hA, std::vector& A, bool test = true) { if(CPU) { rocblas_init(hA, true); for(int b = 0; b < bc; ++b) { // scale A to avoid singularities for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // make copy of original data to test vectors if required if(test && (left_svect != 'N' || right_svect != 'N')) { for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) A[b * lda * n + i + j * lda] = hA[b][i + j * lda]; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void gesvd_getError(const hipsolverHandle_t handle, const char left_svect, const char right_svect, const int m, const int n, Wd& dA, const int lda, const int stA, Td& dS, const int stS, Ud& dU, const int ldu, const int stU, Ud& dV, const int ldv, const int stV, Ud& dWork, const int lwork, Td& dE, const int stE, Id& dinfo, const int bc, const char left_svectT, const char right_svectT, const int mT, const int nT, Ud& dUT, const int lduT, const int stUT, Ud& dVT, const int ldvT, const int stVT, Wh& hA, Th& hS, Th& hSres, Uh& hU, Uh& Ures, const int ldures, Uh& hV, Uh& Vres, const int ldvres, Th& hE, Th& hEres, Ih& hinfo, Ih& hinfoRes, double* max_err, double* max_errv) { int size_W = 5 * max(m, n); std::vector hWork(size_W); std::vector A(lda * n * bc); // input data initialization gesvd_initData(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A); // execute computations: // complementary execution to compute all singular vectors if needed (always in-place to ensure // we don't combine results computed by gemm_batched with results computed by gemm_strided_batched) CHECK_ROCBLAS_ERROR(hipsolver_gesvd(API, NRWK, handle, left_svectT, right_svectT, mT, nT, dA.data(), lda, stA, dS.data(), stS, dUT.data(), lduT, stUT, dVT.data(), ldvT, stVT, dWork.data(), lwork, dE.data(), stE, dinfo.data(), bc)); if(left_svect == 'N' && right_svect != 'N') CHECK_HIP_ERROR(Ures.transfer_from(dUT)); if(right_svect == 'N' && left_svect != 'N') CHECK_HIP_ERROR(Vres.transfer_from(dVT)); gesvd_initData(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A); // CPU lapack for(int b = 0; b < bc; ++b) cblas_gesvd(left_svect, right_svect, m, n, hA[b], lda, hS[b], hU[b], ldu, hV[b], ldv, hWork.data(), size_W, hE[b], hinfo[b]); // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_gesvd(API, NRWK, handle, left_svect, right_svect, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), lwork, dE.data(), stE, dinfo.data(), bc)); CHECK_HIP_ERROR(hSres.transfer_from(dS)); CHECK_HIP_ERROR(hEres.transfer_from(dE)); CHECK_HIP_ERROR(hinfoRes.transfer_from(dinfo)); if(left_svect == 'S' || left_svect == 'A') CHECK_HIP_ERROR(Ures.transfer_from(dU)); if(right_svect == 'S' || right_svect == 'A') CHECK_HIP_ERROR(Vres.transfer_from(dV)); if(left_svect == 'O') { CHECK_HIP_ERROR(hA.transfer_from(dA)); for(int b = 0; b < bc; ++b) { for(int i = 0; i < m; i++) { for(int j = 0; j < min(m, n); j++) Ures[b][i + j * ldures] = hA[b][i + j * lda]; } } } if(right_svect == 'O') { CHECK_HIP_ERROR(hA.transfer_from(dA)); for(int b = 0; b < bc; ++b) { for(int i = 0; i < min(m, n); i++) { for(int j = 0; j < n; j++) Vres[b][i + j * ldvres] = hA[b][i + j * lda]; } } } // Check info for non-convergence *max_err = 0; for(int b = 0; b < bc; ++b) if(hinfo[b][0] != hinfoRes[b][0]) *max_err += 1; // (We expect the used input matrices to always converge. Testing // implicitly the equivalent non-converged matrix is very complicated and it boils // down to essentially run the algorithm again and until convergence is achieved). double err; *max_errv = 0; for(int b = 0; b < bc; ++b) { // error is ||hS - hSres|| err = norm_error('F', 1, min(m, n), 1, hS[b], hSres[b]); *max_err = err > *max_err ? err : *max_err; // Check the singular vectors if required if(hinfo[b][0] == 0 && (left_svect != 'N' || right_svect != 'N')) { err = 0; // check singular vectors implicitly (A*v_k = s_k*u_k) for(int k = 0; k < min(m, n); ++k) { for(int i = 0; i < m; ++i) { T tmp = 0; for(int j = 0; j < n; ++j) tmp += A[b * lda * n + i + j * lda] * std::conj(Vres[b][k + j * ldvres]); tmp -= hSres[b][k] * Ures[b][i + k * ldures]; err += std::abs(tmp) * std::abs(tmp); } } err = std::sqrt(err) / double(snorm('F', m, n, A.data() + b * lda * n, lda)); *max_errv = err > *max_errv ? err : *max_errv; } } } template void gesvd_getPerfData(const hipsolverHandle_t handle, const char left_svect, const char right_svect, const int m, const int n, Wd& dA, const int lda, const int stA, Td& dS, const int stS, Ud& dU, const int ldu, const int stU, Ud& dV, const int ldv, const int stV, Ud& dWork, const int lwork, Td& dE, const int stE, Id& dinfo, const int bc, Wh& hA, Th& hS, Uh& hU, Uh& hV, Th& hE, Ih& hinfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { int size_W = 5 * max(m, n); std::vector hWork(size_W); std::vector A; if(!perf) { gesvd_initData( handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, 0); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_gesvd(left_svect, right_svect, m, n, hA[b], lda, hS[b], hU[b], ldu, hV[b], ldv, hWork.data(), size_W, hE[b], hinfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } gesvd_initData(handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, 0); // cold calls for(int iter = 0; iter < 2; iter++) { gesvd_initData( handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, 0); CHECK_ROCBLAS_ERROR(hipsolver_gesvd(API, NRWK, handle, left_svect, right_svect, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), lwork, dE.data(), stE, dinfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { gesvd_initData( handle, left_svect, right_svect, m, n, dA, lda, bc, hA, A, 0); start = get_time_us_sync(stream); hipsolver_gesvd(API, NRWK, handle, left_svect, right_svect, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), lwork, dE.data(), stE, dinfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_gesvd(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; char leftv = argus.get("jobu"); char rightv = argus.get("jobv"); int m = argus.get("m"); int n = argus.get("n", m); int lda = argus.get("lda", m); int ldu = argus.get("ldu", m); int ldv = argus.get("ldv", (rightv == 'A' ? n : min(m, n))); int stA = argus.get("strideA", lda * n); int stS = argus.get("strideS", min(m, n)); int stU = argus.get("strideU", ldu * m); int stV = argus.get("strideV", ldv * n); int stE = argus.get("strideE", min(m, n) - 1); int bc = argus.batch_count; int hot_calls = argus.iters; // check non-supported values if(rightv == 'O' && leftv == 'O') { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, // NRWK, // handle, // leftv, // rightv, // m, // n, // (T* const*)nullptr, // lda, // stA, // (S*)nullptr, // stS, // (T*)nullptr, // ldu, // stU, // (T*)nullptr, // ldv, // stV, // (T*)nullptr, // 0, // (S*)nullptr, // stE, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, NRWK, handle, leftv, rightv, m, n, (T*)nullptr, lda, stA, (S*)nullptr, stS, (T*)nullptr, ldu, stU, (T*)nullptr, ldv, stV, (T*)nullptr, 0, (S*)nullptr, stE, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_args); return; } /** TESTING OF SINGULAR VECTORS IS DONE IMPLICITLY, NOT EXPLICITLY COMPARING WITH LAPACK. SO, WE ALWAYS NEED TO COMPUTE THE SAME NUMBER OF ELEMENTS OF THE RIGHT AND LEFT VECTORS. WHILE DOING THIS, IF MORE VECTORS THAN THE SPECIFIED IN THE MAIN CALL NEED TO BE COMPUTED, WE DO SO WITH AN EXTRA CALL **/ signed char leftvT = 'N'; signed char rightvT = 'N'; int ldvT = 1; int lduT = 1; int mT = 0; int nT = 0; bool svects = (leftv != 'N' || rightv != 'N'); if(svects) { if(leftv == 'N') { leftvT = 'A'; lduT = m; mT = m; nT = n; // if((n > m && fa == rocblas_outofplace) || (n > m && rightv == 'O')) // rightvT = 'O'; } if(rightv == 'N') { rightvT = 'A'; ldvT = n; mT = m; nT = n; // if((m >= n && fa == rocblas_outofplace) || (m >= n && leftv == 'O')) // leftvT = 'O'; } } // determine sizes int ldures = 1; int ldvres = 1; size_t size_Sres = 0; size_t size_Eres = 0; size_t size_Ures = 0; size_t size_Vres = 0; size_t size_UT = 0; size_t size_VT = 0; size_t size_A = size_t(lda) * n; size_t size_S = size_t(min(m, n)); size_t size_E = size_t(min(m, n) - 1); size_t size_V = size_t(ldv) * n; size_t size_U = size_t(ldu) * m; if(argus.unit_check || argus.norm_check) { size_VT = size_t(ldvT) * nT; size_UT = size_t(lduT) * mT; size_Sres = size_S; size_Eres = size_E; if(svects) { if(leftv == 'N') { size_Ures = size_UT; ldures = lduT; } else if(leftv == 'S' || leftv == 'A') { size_Ures = size_U; ldures = ldu; } else { size_Ures = m * m; ldures = m; } if(rightv == 'N') { size_Vres = size_VT; ldvres = ldvT; } else if(rightv == 'S' || rightv == 'A') { size_Vres = size_V; ldvres = ldv; } else { size_Vres = n * n; ldvres = n; } } } int stUT = size_UT; int stVT = size_VT; int stUres = size_Ures; int stVres = size_Vres; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0, max_errorv = 0; // check invalid sizes bool invalid_size = (n < 0 || m < 0 || lda < m || ldu < 1 || ldv < 1 || bc < 0) || ((leftv == 'A' || leftv == 'S') && ldu < m) || ((rightv == 'A' && ldv < n) || (rightv == 'S' && ldv < min(m, n))); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, // NRWK, // handle, // leftv, // rightv, // m, // n, // (T* const*)nullptr, // lda, // stA, // (S*)nullptr, // stS, // (T*)nullptr, // ldu, // stU, // (T*)nullptr, // ldv, // stV, // (T*)nullptr, // 0, // (S*)nullptr, // stE, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_gesvd(API, NRWK, handle, leftv, rightv, m, n, (T*)nullptr, lda, stA, (S*)nullptr, stS, (T*)nullptr, ldu, stU, (T*)nullptr, ldv, stV, (T*)nullptr, 0, (S*)nullptr, stE, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W, w1, w2; hipsolver_gesvd_bufferSize(API, handle, leftv, rightv, m, n, (T*)nullptr, lda, &w1); hipsolver_gesvd_bufferSize(API, handle, leftvT, rightvT, mT, nT, (T*)nullptr, lda, &w2); size_W = max(w1, w2); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations (all cases) // host host_strided_batch_vector hE(5 * max(m, n), 1, 5 * max(m, n), bc); host_strided_batch_vector hS(size_S, 1, stS, bc); host_strided_batch_vector hV(size_V, 1, stV, bc); host_strided_batch_vector hU(size_U, 1, stU, bc); host_strided_batch_vector hinfo(1, 1, 1, bc); host_strided_batch_vector hinfoRes(1, 1, 1, bc); host_strided_batch_vector hSres(size_Sres, 1, stS, bc); host_strided_batch_vector hEres(size_Eres, 1, stE, bc); host_strided_batch_vector Vres(size_Vres, 1, stVres, bc); host_strided_batch_vector Ures(size_Ures, 1, stUres, bc); // device device_strided_batch_vector dE(size_E, 1, stE, bc); device_strided_batch_vector dS(size_S, 1, stS, bc); device_strided_batch_vector dV(size_V, 1, stV, bc); device_strided_batch_vector dU(size_U, 1, stU, bc); device_strided_batch_vector dinfo(1, 1, 1, bc); device_strided_batch_vector dVT(size_VT, 1, stVT, bc); device_strided_batch_vector dUT(size_UT, 1, stUT, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_VT) CHECK_HIP_ERROR(dVT.memcheck()); if(size_UT) CHECK_HIP_ERROR(dUT.memcheck()); if(size_E) CHECK_HIP_ERROR(dE.memcheck()); if(size_S) CHECK_HIP_ERROR(dS.memcheck()); if(size_V) CHECK_HIP_ERROR(dV.memcheck()); if(size_U) CHECK_HIP_ERROR(dU.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // device_batch_vector dA(size_A, 1, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // { // gesvd_getError(handle, // leftv, // rightv, // m, // n, // dA, // lda, // stA, // dS, // stS, // dU, // ldu, // stU, // dV, // ldv, // stV, // dWork, // size_W, // dE, // stE, // dinfo, // bc, // leftvT, // rightvT, // mT, // nT, // dUT, // lduT, // stUT, // dVT, // ldvT, // stVT, // hA, // hS, // hSres, // hU, // Ures, // ldures, // hV, // Vres, // ldvres, // hE, // hEres, // hinfo, // hinfoRes, // &max_error, // &max_errorv); // } // // collect performance data // if(argus.timing) // { // gesvd_getPerfData(handle, // leftv, // rightv, // m, // n, // dA, // lda, // stA, // dS, // stS, // dU, // ldu, // stU, // dV, // ldv, // stV, // dWork, // size_W, // dE, // stE, // dinfo, // bc, // hA, // hS, // hU, // hV, // hE, // hinfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); // } } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); // check computations if(argus.unit_check || argus.norm_check) { gesvd_getError(handle, leftv, rightv, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, size_W, dE, stE, dinfo, bc, leftvT, rightvT, mT, nT, dUT, lduT, stUT, dVT, ldvT, stVT, hA, hS, hSres, hU, Ures, ldures, hV, Vres, ldvres, hE, hEres, hinfo, hinfoRes, &max_error, &max_errorv); } // collect performance data if(argus.timing) { gesvd_getPerfData(handle, leftv, rightv, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, size_W, dE, stE, dinfo, bc, hA, hS, hU, hV, hE, hinfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } } // validate results for rocsolver-test // using 2 * min(m, n) * machine_precision as tolerance if(argus.unit_check) { ROCSOLVER_TEST_CHECK(T, max_error, 2 * min(m, n)); if(svects) ROCSOLVER_TEST_CHECK(T, max_errorv, 2 * min(m, n)); } // output results for rocsolver-bench if(argus.timing) { if(svects) max_error = (max_error >= max_errorv) ? max_error : max_errorv; if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("jobu", "jobv", "m", "n", "lda", "strideS", "ldu", "strideU", "ldv", "strideV", "strideE", "batch_c"); rocsolver_bench_output(leftv, rightv, m, n, lda, stS, ldu, stU, ldv, stV, stE, bc); } else if(STRIDED) { rocsolver_bench_output("jobu", "jobv", "m", "n", "lda", "strideA", "strideS", "ldu", "strideU", "ldv", "strideV", "strideE", "batch_c"); rocsolver_bench_output( leftv, rightv, m, n, lda, stA, stS, ldu, stU, ldv, stV, stE, bc); } else { rocsolver_bench_output("jobu", "jobv", "m", "n", "lda", "ldu", "ldv"); rocsolver_bench_output(leftv, rightv, m, n, lda, ldu, ldv); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_gesvda.hpp000066400000000000000000001340721436107207300230100ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void gesvda_checkBadArgs(const hipsolverHandle_t handle, hipsolverEigMode_t jobz, const int rank, const int m, const int n, W dA, const int lda, const int stA, TT dS, const int stS, T dU, const int ldu, const int stU, T dV, const int ldv, const int stV, T dWork, const int lwork, U dinfo, double* hRnrmF, const int bc) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // handle EXPECT_ROCBLAS_STATUS(hipsolver_gesvda(API, STRIDED, nullptr, jobz, rank, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, hRnrmF, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_gesvda(API, STRIDED, handle, hipsolverEigMode_t(-1), rank, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, hRnrmF, bc), HIPSOLVER_STATUS_INVALID_ENUM); // pointers EXPECT_ROCBLAS_STATUS(hipsolver_gesvda(API, STRIDED, handle, jobz, rank, m, n, (W) nullptr, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, hRnrmF, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvda(API, STRIDED, handle, jobz, rank, m, n, dA, lda, stA, (TT) nullptr, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, hRnrmF, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvda(API, STRIDED, handle, jobz, rank, m, n, dA, lda, stA, dS, stS, (T) nullptr, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, hRnrmF, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvda(API, STRIDED, handle, jobz, rank, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, (T) nullptr, ldv, stV, dWork, lwork, dinfo, hRnrmF, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvda(API, STRIDED, handle, jobz, rank, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, (U) nullptr, hRnrmF, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_gesvda_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; hipsolverEigMode_t jobz = HIPSOLVER_EIG_MODE_VECTOR; int rank = 1; int m = 2; int n = 2; int lda = 2; int ldu = 2; int ldv = 2; int stA = 2; int stS = 2; int stU = 2; int stV = 2; int bc = 1; if(BATCHED) { // // memory allocations // host_strided_batch_vector hRnrmF(1, 1, 1, 1); // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dS(1, 1, 1, 1); // device_strided_batch_vector dU(1, 1, 1, 1); // device_strided_batch_vector dV(1, 1, 1, 1); // device_strided_batch_vector dinfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dS.memcheck()); // CHECK_HIP_ERROR(dU.memcheck()); // CHECK_HIP_ERROR(dV.memcheck()); // CHECK_HIP_ERROR(dinfo.memcheck()); // int size_W; // hipsolver_gesvda_bufferSize(API, // STRIDED, // handle, // jobz, // rank, // m, // n, // dA.data(), // lda, // stA, // dS.data(), // stS, // dU.data(), // ldu, // stU, // dV.data(), // ldv, // stV, // &size_W, // bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // gesvda_checkBadArgs(handle, // jobz, // rank, // m, // n, // dA.data(), // lda, // stA, // dS.data(), // stS, // dU.data(), // ldu, // stU, // dV.data(), // ldv, // stV, // dWork.data(), // size_W, // dinfo.data(), // hRnrmF.data(), // bc); } else { // memory allocations host_strided_batch_vector hRnrmF(1, 1, 1, 1); device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dS(1, 1, 1, 1); device_strided_batch_vector dU(1, 1, 1, 1); device_strided_batch_vector dV(1, 1, 1, 1); device_strided_batch_vector dinfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dS.memcheck()); CHECK_HIP_ERROR(dU.memcheck()); CHECK_HIP_ERROR(dV.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); int size_W; hipsolver_gesvda_bufferSize(API, STRIDED, handle, jobz, rank, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, &size_W, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments gesvda_checkBadArgs(handle, jobz, rank, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), size_W, dinfo.data(), hRnrmF, bc); } } template void gesvda_initData(const hipsolverHandle_t handle, hipsolverEigMode_t jobz, const int m, const int n, Td& dA, const int lda, const int bc, Th& hA, std::vector& A, bool test = true) { if(CPU) { rocblas_init(hA, true); for(int b = 0; b < bc; ++b) { // scale A to avoid singularities for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // make copy of original data to test vectors if required if(test && jobz != HIPSOLVER_EIG_MODE_NOVECTOR) { for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) A[b * lda * n + i + j * lda] = hA[b][i + j * lda]; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void gesvda_getError(const hipsolverHandle_t handle, hipsolverEigMode_t jobz, const int rank, const int m, const int n, Wd& dA, const int lda, const int stA, Td& dS, const int stS, Ud& dU, const int ldu, const int stU, Ud& dV, const int ldv, const int stV, Ud& dWork, const int lwork, Id& dinfo, double* hRnrmF, const int bc, Wh& hA, Th& hS, Th& hSres, Uh& hUres, Uh& hVres, Ih& hinfo, Ih& hinfoRes, double* max_err, double* max_errv) { /** WORKAROUND: Due to errors in gesvdx, we will call gesvd to get all the singular values on the CPU side and use a subset of them for comparison. This approach has 2 disadvantages: 1. singular values are not computed to the same accuracy by gesvd and gesvda. So, comparison maybe more sensitive. 2. info cannot be tested as it has a different meaning in gesvd 3. we cannot provide timing for CPU execution using gesvd when testing gesvda **/ // (TODO: We may revisit the entire approach in the future: change to another solution, // or wait for problems with gesvdx_ to be fixed) using S = decltype(std::real(T{})); int size_W = 5 * max(m, n); std::vector hE(size_W); std::vector hWork(size_W); std::vector A(lda * n * bc); // input data initialization gesvda_initData(handle, jobz, m, n, dA, lda, bc, hA, A); // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_gesvda(API, STRIDED, handle, jobz, rank, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), lwork, dinfo.data(), hRnrmF, bc)); CHECK_HIP_ERROR(hSres.transfer_from(dS)); CHECK_HIP_ERROR(hinfoRes.transfer_from(dinfo)); if(jobz != HIPSOLVER_EIG_MODE_NOVECTOR) { CHECK_HIP_ERROR(hUres.transfer_from(dU)); CHECK_HIP_ERROR(hVres.transfer_from(dV)); } // CPU lapack // Only singular values needed for(int b = 0; b < bc; ++b) cblas_gesvd('N', 'N', m, n, hA[b], lda, hS[b], nullptr, ldu, nullptr, ldv, hWork.data(), size_W, hE.data(), hinfo[b]); // // Check info for non-convergence *max_err = 0; // for(int b = 0; b < bc; ++b) // if(hinfo[b][0] != hinfoRes[b][0]) // *max_err += 1; double err; *max_errv = 0; for(int b = 0; b < bc; ++b) { // error is ||hS - hSres|| err = norm_error('F', 1, rank, 1, hS[b], hSres[b]); *max_err = err > *max_err ? err : *max_err; // Check the singular vectors if required if(hinfoRes[b][0] == 0 && jobz != HIPSOLVER_EIG_MODE_NOVECTOR) { err = 0; // check singular vectors implicitly (A*v_k = s_k*u_k) for(int k = 0; k < rank; ++k) { T tmp = 0; double tmp2 = 0; // (Comparing absolute values to deal with the fact that the pair of singular vectors (u,-v) or (-u,v) are // both ok and we could get either one with the complementary or main executions when only // one side set of vectors is required. May be revisited in the future.) for(int i = 0; i < m; ++i) { tmp = 0; for(rocblas_int j = 0; j < n; ++j) tmp += A[b * lda * n + i + j * lda] * hVres[b][j + k * ldv]; tmp2 = std::abs(tmp) - std::abs(hSres[b][k] * hUres[b][i + k * ldu]); err += tmp2 * tmp2; } } err = std::sqrt(err) / double(snorm('F', m, n, A.data() + b * lda * n, lda)); *max_errv = err > *max_errv ? err : *max_errv; } } } template void gesvda_getPerfData(const hipsolverHandle_t handle, hipsolverEigMode_t jobz, const int rank, const int m, const int n, Wd& dA, const int lda, const int stA, Td& dS, const int stS, Ud& dU, const int ldu, const int stU, Ud& dV, const int ldv, const int stV, Ud& dWork, const int lwork, Id& dinfo, double* hRnrmF, const int bc, Wh& hA, Th& hS, Uh& hU, Uh& hV, Ih& hinfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { std::vector A; if(!perf) { // For now we cannot report cpu time due to errors in LAPACK's gesvdx *cpu_time_used = nan(""); } gesvda_initData(handle, jobz, m, n, dA, lda, bc, hA, A, 0); // cold calls for(int iter = 0; iter < 2; iter++) { gesvda_initData(handle, jobz, m, n, dA, lda, bc, hA, A, 0); CHECK_ROCBLAS_ERROR(hipsolver_gesvda(API, STRIDED, handle, jobz, rank, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), lwork, dinfo.data(), hRnrmF, bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { gesvda_initData(handle, jobz, m, n, dA, lda, bc, hA, A, 0); start = get_time_us_sync(stream); hipsolver_gesvda(API, STRIDED, handle, jobz, rank, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), lwork, dinfo.data(), hRnrmF, bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_gesvda(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; char jobzC = argus.get("jobz"); int rank = argus.get("rank", 1); int m = argus.get("m"); int n = argus.get("n", m); int lda = argus.get("lda", m); int ldu = argus.get("ldu", m); int ldv = argus.get("ldv", n); rocblas_stride stA = argus.get("strideA", lda * n); rocblas_stride stS = argus.get("strideS", min(m, n)); rocblas_stride stU = argus.get("strideU", ldu * min(m, n)); rocblas_stride stV = argus.get("strideV", ldv * min(m, n)); hipsolverEigMode_t jobz = char2hipsolver_evect(jobzC); int bc = argus.batch_count; int hot_calls = argus.iters; rocblas_stride stUres = 0; rocblas_stride stVres = 0; // determine sizes size_t size_A = size_t(lda) * n; size_t size_S = size_t(min(m, n)); size_t size_V = 0; size_t size_U = 0; size_t size_Sres = 0; size_t size_hUres = 0; size_t size_hVres = 0; if(jobz != HIPSOLVER_EIG_MODE_NOVECTOR) { size_U = size_t(ldu) * min(m, n); size_V = size_t(ldv) * min(m, n); } if(argus.unit_check || argus.norm_check) { size_Sres = size_S; size_hUres = size_U; size_hVres = size_V; stUres = stU; stVres = stV; } double max_error = 0, gpu_time_used = 0, cpu_time_used = 0, max_errorv = 0; // check invalid sizes bool invalid_size = (rank <= 0 || rank > min(m, n) || n < 0 || m < 0 || lda < m || ldu < 1 || ldv < 1 || bc < 0) || (jobz != HIPSOLVER_EIG_MODE_NOVECTOR && (ldu < m || ldv < n)); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_gesvda(API, // STRIDED, // handle, // jobz, // rank, // m, // n, // (T* const*)nullptr, // lda, // stA, // (S*)nullptr, // stS, // (T*)nullptr, // ldu, // stU, // (T*)nullptr, // ldv, // stV, // (T*)nullptr, // 0, // (int*)nullptr, // (double*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_gesvda(API, STRIDED, handle, jobz, rank, m, n, (T*)nullptr, lda, stA, (S*)nullptr, stS, (T*)nullptr, ldu, stU, (T*)nullptr, ldv, stV, (T*)nullptr, 0, (int*)nullptr, (double*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_gesvda_bufferSize(API, STRIDED, handle, jobz, rank, m, n, (T*)nullptr, lda, stA, (S*)nullptr, stS, (T*)nullptr, ldu, stU, (T*)nullptr, ldv, stV, &size_W, bc); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations (all cases) // host host_strided_batch_vector hS(size_S, 1, stS, bc); host_strided_batch_vector hV(size_V, 1, stV, bc); host_strided_batch_vector hU(size_U, 1, stU, bc); host_strided_batch_vector hRnrmF(1, 1, 1, bc); host_strided_batch_vector hinfo(1, 1, 1, bc); host_strided_batch_vector hinfoRes(1, 1, 1, bc); host_strided_batch_vector hSres(size_Sres, 1, stS, bc); host_strided_batch_vector hVres(size_hVres, 1, stVres, bc); host_strided_batch_vector hUres(size_hUres, 1, stUres, bc); // device device_strided_batch_vector dS(size_S, 1, stS, bc); device_strided_batch_vector dV(size_V, 1, stV, bc); device_strided_batch_vector dU(size_U, 1, stU, bc); device_strided_batch_vector dinfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_S) CHECK_HIP_ERROR(dS.memcheck()); if(size_V) CHECK_HIP_ERROR(dV.memcheck()); if(size_U) CHECK_HIP_ERROR(dU.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // device_batch_vector dA(size_A, 1, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // { // gesvda_getError(handle, // jobz, // rank, // m, // n, // dA, // lda, // stA, // dS, // stS, // dU, // ldu, // stU, // dV, // ldv, // stV, // dWork, // size_W, // dinfo, // hRnrmF, // bc, // hA, // hS, // hSres, // hUres, // hVres, // hinfo, // hinfoRes, // &max_error, // &max_errorv); // } // // collect performance data // if(argus.timing) // { // gesvda_getPerfData(handle, // jobz, // rank, // m, // n, // dA, // lda, // stA, // dS, // stS, // dU, // ldu, // stU, // dV, // ldv, // stV, // dWork, // size_W, // dinfo, // hRnrmF, // bc, // hA, // hS, // hU, // hV, // hinfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); // } } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); // check computations if(argus.unit_check || argus.norm_check) { gesvda_getError(handle, jobz, rank, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, size_W, dinfo, hRnrmF, bc, hA, hS, hSres, hUres, hVres, hinfo, hinfoRes, &max_error, &max_errorv); } // collect performance data if(argus.timing) { gesvda_getPerfData(handle, jobz, rank, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, size_W, dinfo, hRnrmF, bc, hA, hS, hU, hV, hinfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } } // validate results for rocsolver-test // using 3 * min(m, n) * machine_precision as tolerance if(argus.unit_check) { ROCSOLVER_TEST_CHECK(T, max_error, 3 * min(m, n)); if(jobz != HIPSOLVER_EIG_MODE_NOVECTOR) ROCSOLVER_TEST_CHECK(T, max_errorv, 3 * min(m, n)); } // output results for rocsolver-bench if(argus.timing) { if(jobz != HIPSOLVER_EIG_MODE_NOVECTOR) max_error = (max_error >= max_errorv) ? max_error : max_errorv; if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("jobz", "rank", "m", "n", "lda", "ldu", "ldv", "batch_c"); rocsolver_bench_output(jobz, rank, m, n, lda, ldu, ldv, bc); } else if(STRIDED) { rocsolver_bench_output("jobz", "rank", "m", "n", "lda", "strideA", "strideS", "ldu", "strideU", "ldv", "strideV", "batch_c"); rocsolver_bench_output(jobz, rank, m, n, lda, stA, stS, ldu, stU, ldv, stV, bc); } else { rocsolver_bench_output("jobz", "rank", "m", "n", "lda", "ldu", "ldv"); rocsolver_bench_output(jobz, rank, m, n, lda, ldu, ldv); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_gesvdj.hpp000066400000000000000000001422551436107207300230230ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void gesvdj_checkBadArgs(const hipsolverHandle_t handle, hipsolverEigMode_t jobz, const int econ, const int m, const int n, W dA, const int lda, const int stA, TT dS, const int stS, T dU, const int ldu, const int stU, T dV, const int ldv, const int stV, T dWork, const int lwork, U dinfo, const hipsolverGesvdjInfo_t params, const int bc) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // handle EXPECT_ROCBLAS_STATUS(hipsolver_gesvdj(API, STRIDED, nullptr, jobz, econ, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_gesvdj(API, STRIDED, handle, hipsolverEigMode_t(-1), econ, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_INVALID_ENUM); // pointers EXPECT_ROCBLAS_STATUS(hipsolver_gesvdj(API, STRIDED, handle, jobz, econ, m, n, (W) nullptr, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvdj(API, STRIDED, handle, jobz, econ, m, n, dA, lda, stA, (TT) nullptr, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvdj(API, STRIDED, handle, jobz, econ, m, n, dA, lda, stA, dS, stS, (T) nullptr, ldu, stU, dV, ldv, stV, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvdj(API, STRIDED, handle, jobz, econ, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, (T) nullptr, ldv, stV, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_gesvdj(API, STRIDED, handle, jobz, econ, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, lwork, (U) nullptr, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_gesvdj_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; hipsolver_local_gesvdj_info params; hipsolverEigMode_t jobz = HIPSOLVER_EIG_MODE_VECTOR; int econ = 0; int m = 2; int n = 2; int lda = 2; int ldu = 2; int ldv = 2; int stA = 2; int stS = 2; int stU = 2; int stV = 2; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dS(1, 1, 1, 1); // device_strided_batch_vector dU(1, 1, 1, 1); // device_strided_batch_vector dV(1, 1, 1, 1); // device_strided_batch_vector dinfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dS.memcheck()); // CHECK_HIP_ERROR(dU.memcheck()); // CHECK_HIP_ERROR(dV.memcheck()); // CHECK_HIP_ERROR(dinfo.memcheck()); // int size_W; // hipsolver_gesvdj_bufferSize(API, // STRIDED, // handle, // jobz, // econ, // m, // n, // dA.data(), // lda, // dS.data(), // dU.data(), // ldu, // dV.data(), // ldv, // &size_W, // params, // bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // gesvdj_checkBadArgs(handle, // jobz, // econ, // m, // n, // dA.data(), // lda, // stA, // dS.data(), // stS, // dU.data(), // ldu, // stU, // dV.data(), // ldv, // stV, // dWork.data(), // size_W, // dinfo.data(), // params, // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dS(1, 1, 1, 1); device_strided_batch_vector dU(1, 1, 1, 1); device_strided_batch_vector dV(1, 1, 1, 1); device_strided_batch_vector dinfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dS.memcheck()); CHECK_HIP_ERROR(dU.memcheck()); CHECK_HIP_ERROR(dV.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); int size_W; hipsolver_gesvdj_bufferSize(API, STRIDED, handle, jobz, econ, m, n, dA.data(), lda, dS.data(), dU.data(), ldu, dV.data(), ldv, &size_W, params, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments gesvdj_checkBadArgs(handle, jobz, econ, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), size_W, dinfo.data(), params, bc); } } template void gesvdj_initData(const hipsolverHandle_t handle, hipsolverEigMode_t jobz, const int m, const int n, Td& dA, const int lda, const int bc, Th& hA, std::vector& A, bool test = true) { if(CPU) { rocblas_init(hA, true); for(int b = 0; b < bc; ++b) { // scale A to avoid singularities for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // make copy of original data to test vectors if required if(test && jobz != HIPSOLVER_EIG_MODE_NOVECTOR) { for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) A[b * lda * n + i + j * lda] = hA[b][i + j * lda]; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void gesvdj_getError(const hipsolverHandle_t handle, hipsolverEigMode_t jobz, const int econ, const int m, const int n, Wd& dA, const int lda, const int stA, Td& dS, const int stS, Ud& dU, const int ldu, const int stU, Ud& dV, const int ldv, const int stV, Ud& dWork, const int lwork, Id& dinfo, hipsolverGesvdjInfo_t params, const double abstol, const int max_sweeps, const int sort_eig, const int bc, Wh& hA, Th& hS, Th& hSres, Uh& Ures, Uh& Vres, Ih& hinfo, Ih& hinfoRes, Vh& hResidualRes, Ih& hSweepsRes, double* max_err, double* max_errv) { using S = decltype(std::real(T{})); int size_W = 5 * max(m, n); std::vector hE(size_W); std::vector hWork(size_W); std::vector A(lda * n * bc); // input data initialization gesvdj_initData(handle, jobz, m, n, dA, lda, bc, hA, A); // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_gesvdj(API, STRIDED, handle, jobz, econ, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), lwork, dinfo.data(), params, bc)); CHECK_HIP_ERROR(hSres.transfer_from(dS)); CHECK_HIP_ERROR(hinfoRes.transfer_from(dinfo)); if(jobz != HIPSOLVER_EIG_MODE_NOVECTOR) { CHECK_HIP_ERROR(Ures.transfer_from(dU)); CHECK_HIP_ERROR(Vres.transfer_from(dV)); } hipsolverXgesvdjGetResidual(handle, params, hResidualRes.data()); hipsolverXgesvdjGetSweeps(handle, params, hSweepsRes.data()); // CPU lapack // Only singular values needed for(int b = 0; b < bc; ++b) cblas_gesvd('N', 'N', m, n, hA[b], lda, hS[b], nullptr, ldu, nullptr, ldv, hWork.data(), size_W, hE.data(), hinfo[b]); // Check info for non-convergence *max_err = 0; for(int b = 0; b < bc; ++b) if(hinfo[b][0] != hinfoRes[b][0]) *max_err += 1; if(!STRIDED) { // Also check validity of residual for(rocblas_int b = 0; b < bc; ++b) if(hResidualRes[b][0] < 0) *max_err += 1; // Also check validity of sweeps for(rocblas_int b = 0; b < bc; ++b) if(hSweepsRes[b][0] < 0 || hSweepsRes[b][0] > max_sweeps) *max_err += 1; } // (We expect the used input matrices to always converge. Testing // implicitly the equivalent non-converged matrix is very complicated and it boils // down to essentially run the algorithm again and until convergence is achieved). double err = 0; *max_errv = 0; for(int b = 0; b < bc; ++b) { // error is ||hS - hSres|| if(sort_eig) err = norm_error('F', 1, min(m, n), 1, hS[b], hSres[b]); *max_err = err > *max_err ? err : *max_err; // Check the singular vectors if required if(hinfo[b][0] == 0 && jobz != HIPSOLVER_EIG_MODE_NOVECTOR) { err = 0; // check singular vectors implicitly (A*v_k = s_k*u_k) for(int k = 0; k < min(m, n); ++k) { for(int i = 0; i < m; ++i) { T tmp = 0; for(int j = 0; j < n; ++j) tmp += A[b * lda * n + i + j * lda] * Vres[b][j + k * ldv]; tmp -= hSres[b][k] * Ures[b][i + k * ldu]; err += std::abs(tmp) * std::abs(tmp); } } err = std::sqrt(err) / double(snorm('F', m, n, A.data() + b * lda * n, lda)); *max_errv = err > *max_errv ? err : *max_errv; } } } template void gesvdj_getPerfData(const hipsolverHandle_t handle, hipsolverEigMode_t jobz, const int econ, const int m, const int n, Wd& dA, const int lda, const int stA, Td& dS, const int stS, Ud& dU, const int ldu, const int stU, Ud& dV, const int ldv, const int stV, Ud& dWork, const int lwork, Id& dinfo, hipsolverGesvdjInfo_t params, const int bc, Wh& hA, Th& hS, Uh& hU, Uh& hV, Ih& hinfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { using S = decltype(std::real(T{})); int size_W = 5 * max(m, n); std::vector hE(size_W); std::vector hWork(size_W); std::vector A; char svect = (jobz == HIPSOLVER_EIG_MODE_NOVECTOR ? 'N' : (econ == 0 ? 'A' : 'S')); int ldv_trans = (jobz == HIPSOLVER_EIG_MODE_NOVECTOR ? 1 : (econ == 0 ? n : min(m, n))); if(!perf) { gesvdj_initData(handle, jobz, m, n, dA, lda, bc, hA, A, 0); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_gesvd(svect, svect, m, n, hA[b], lda, hS[b], hU[b], ldu, hV[b], ldv_trans, hWork.data(), size_W, hE.data(), hinfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } gesvdj_initData(handle, jobz, m, n, dA, lda, bc, hA, A, 0); // cold calls for(int iter = 0; iter < 2; iter++) { gesvdj_initData(handle, jobz, m, n, dA, lda, bc, hA, A, 0); CHECK_ROCBLAS_ERROR(hipsolver_gesvdj(API, STRIDED, handle, jobz, econ, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), lwork, dinfo.data(), params, bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { gesvdj_initData(handle, jobz, m, n, dA, lda, bc, hA, A, 0); start = get_time_us_sync(stream); hipsolver_gesvdj(API, STRIDED, handle, jobz, econ, m, n, dA.data(), lda, stA, dS.data(), stS, dU.data(), ldu, stU, dV.data(), ldv, stV, dWork.data(), lwork, dinfo.data(), params, bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_gesvdj(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; hipsolver_local_gesvdj_info params; char jobzC = argus.get("jobz"); int econ = !STRIDED ? argus.get("econ", 0) : 0; int m = argus.get("m"); int n = argus.get("n", m); int lda = argus.get("lda", m); int ldu = argus.get("ldu", m); int ldv = argus.get("ldv", n); int stA = lda * n; int stS = min(m, n); int stU = ldu * (econ ? min(m, n) : m); int stV = ldv * (econ ? min(m, n) : n); double abstol = argus.get("tolerance", 2 * get_epsilon()); rocblas_int max_sweeps = argus.get("max_sweeps", 100); rocblas_int sort_eig = argus.get("sort_eig", 1); hipsolverXgesvdjSetTolerance(params, abstol); hipsolverXgesvdjSetMaxSweeps(params, max_sweeps); hipsolverXgesvdjSetSortEig(params, sort_eig); hipsolverEigMode_t jobz = char2hipsolver_evect(jobzC); int bc = argus.batch_count; int hot_calls = argus.iters; rocblas_stride stUres = 0; rocblas_stride stVres = 0; // determine sizes size_t size_A = size_t(lda) * n; size_t size_S = size_t(min(m, n)); size_t size_V = 0; size_t size_U = 0; size_t size_Sres = 0; size_t size_Ures = 0; size_t size_Vres = 0; if(jobz != HIPSOLVER_EIG_MODE_NOVECTOR) { size_U = size_t(ldu) * (econ ? min(m, n) : m); size_V = size_t(ldv) * (econ ? min(m, n) : n); } if(argus.unit_check || argus.norm_check) { size_Sres = size_S; size_Ures = size_U; size_Vres = size_V; stUres = stU; stVres = stV; } double max_error = 0, gpu_time_used = 0, cpu_time_used = 0, max_errorv = 0; // check invalid sizes bool invalid_size = (n < 0 || m < 0 || lda < m || ldu < 1 || ldv < 1 || bc < 0) || (jobz != HIPSOLVER_EIG_MODE_NOVECTOR && (ldu < m || ldv < n)); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_gesvdj(API, // STRIDED, // handle, // jobz, // econ, // m, // n, // (T* const*)nullptr, // lda, // stA, // (S*)nullptr, // stS, // (T*)nullptr, // ldu, // stU, // (T*)nullptr, // ldv, // stV, // (T*)nullptr, // 0, // (int*)nullptr, // params, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_gesvdj(API, STRIDED, handle, jobz, econ, m, n, (T*)nullptr, lda, stA, (S*)nullptr, stS, (T*)nullptr, ldu, stU, (T*)nullptr, ldv, stV, (T*)nullptr, 0, (int*)nullptr, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_gesvdj_bufferSize(API, STRIDED, handle, jobz, econ, m, n, (T*)nullptr, lda, (S*)nullptr, (T*)nullptr, ldu, (T*)nullptr, ldv, &size_W, params, bc); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations (all cases) // host host_strided_batch_vector hResidualRes(1, 1, 1, bc); host_strided_batch_vector hSweepsRes(1, 1, 1, bc); host_strided_batch_vector hS(size_S, 1, stS, bc); host_strided_batch_vector hV(size_V, 1, stV, bc); host_strided_batch_vector hU(size_U, 1, stU, bc); host_strided_batch_vector hinfo(1, 1, 1, bc); host_strided_batch_vector hinfoRes(1, 1, 1, bc); host_strided_batch_vector hSres(size_Sres, 1, stS, bc); host_strided_batch_vector Vres(size_Vres, 1, stVres, bc); host_strided_batch_vector Ures(size_Ures, 1, stUres, bc); // device device_strided_batch_vector dS(size_S, 1, stS, bc); device_strided_batch_vector dV(size_V, 1, stV, bc); device_strided_batch_vector dU(size_U, 1, stU, bc); device_strided_batch_vector dinfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_S) CHECK_HIP_ERROR(dS.memcheck()); if(size_V) CHECK_HIP_ERROR(dV.memcheck()); if(size_U) CHECK_HIP_ERROR(dU.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // device_batch_vector dA(size_A, 1, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // { // gesvdj_getError(handle, // jobz, // econ, // m, // n, // dA, // lda, // stA, // dS, // stS, // dU, // ldu, // stU, // dV, // ldv, // stV, // dWork, // size_W, // dinfo, // params, // abstol, // max_sweeps, // sort_eig, // bc, // hA, // hS, // hSres, // Ures, // Vres, // hinfo, // hinfoRes, // hResidualRes, // hSweepsRes, // &max_error, // &max_errorv); // } // // collect performance data // if(argus.timing) // { // gesvdj_getPerfData(handle, // jobz, // econ, // m, // n, // dA, // lda, // stA, // dS, // stS, // dU, // ldu, // stU, // dV, // ldv, // stV, // dWork, // size_W, // dinfo, // params, // bc, // hA, // hS, // hU, // hV, // hinfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); // } } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); // check computations if(argus.unit_check || argus.norm_check) { gesvdj_getError(handle, jobz, econ, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, size_W, dinfo, params, abstol, max_sweeps, sort_eig, bc, hA, hS, hSres, Ures, Vres, hinfo, hinfoRes, hResidualRes, hSweepsRes, &max_error, &max_errorv); } // collect performance data if(argus.timing) { gesvdj_getPerfData(handle, jobz, econ, m, n, dA, lda, stA, dS, stS, dU, ldu, stU, dV, ldv, stV, dWork, size_W, dinfo, params, bc, hA, hS, hU, hV, hinfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } } // validate results for rocsolver-test // using 3 * max(m, n) * machine_precision as tolerance if(argus.unit_check) { ROCSOLVER_TEST_CHECK(T, max_error, 3 * max(m, n)); if(jobz != HIPSOLVER_EIG_MODE_NOVECTOR) ROCSOLVER_TEST_CHECK(T, max_errorv, 3 * max(m, n)); } // output results for rocsolver-bench if(argus.timing) { if(jobz != HIPSOLVER_EIG_MODE_NOVECTOR) max_error = (max_error >= max_errorv) ? max_error : max_errorv; if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("jobz", "econ", "m", "n", "lda", "strideS", "ldu", "strideU", "ldv", "strideV", "tolerance", "max_sweeps", "sort_eig", "batch_c"); rocsolver_bench_output(jobz, econ, m, n, lda, stS, ldu, stU, ldv, stV, abstol, max_sweeps, sort_eig, bc); } else if(STRIDED) { rocsolver_bench_output("jobz", "m", "n", "lda", "ldu", "ldv", "tolerance", "max_sweeps", "sort_eig", "batch_c"); rocsolver_bench_output(jobz, m, n, lda, ldu, ldv, abstol, max_sweeps, sort_eig, bc); } else { rocsolver_bench_output("jobz", "econ", "m", "n", "lda", "ldu", "ldv", "tolerance", "max_sweeps", "sort_eig"); rocsolver_bench_output( jobz, econ, m, n, lda, ldu, ldv, abstol, max_sweeps, sort_eig); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_getrf.hpp000066400000000000000000000610361436107207300226450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include "clientcommon.hpp" template void getrf_checkBadArgs(const hipsolverHandle_t handle, const int m, const int n, T dA, const int lda, const int stA, U dWork, const int lwork, V dIpiv, const int stP, V dinfo, const int bc) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // handle EXPECT_ROCBLAS_STATUS( hipsolver_getrf( API, false, nullptr, m, n, dA, lda, stA, dWork, lwork, dIpiv, stP, dinfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values // N/A // pointers EXPECT_ROCBLAS_STATUS( hipsolver_getrf( API, false, handle, m, n, (T) nullptr, lda, stA, dWork, lwork, dIpiv, stP, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_getrf( API, false, handle, m, n, dA, lda, stA, dWork, lwork, dIpiv, stP, (V) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_getrf_bad_arg() { // safe arguments hipsolver_local_handle handle; int m = 1; int n = 1; int lda = 1; int stA = 1; int stP = 1; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dIpiv(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dIpiv.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // int size_W; // hipsolver_getrf_bufferSize(API, handle, m, n, dA.data(), lda, &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // getrf_checkBadArgs( // handle, m, n, dA.data(), lda, stA, dWork.data(), size_W, dIpiv.data(), stP, dInfo.data(), bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_getrf_bufferSize(API, handle, m, n, dA.data(), lda, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments getrf_checkBadArgs(handle, m, n, dA.data(), lda, stA, dWork.data(), size_W, dIpiv.data(), stP, dInfo.data(), bc); } } template void getrf_initData(const hipsolverHandle_t handle, const int m, const int n, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Ud& dInfo, const int bc, Th& hA, Uh& hIpiv, Uh& hInfo) { if(CPU) { T tmp; rocblas_init(hA, true); for(int b = 0; b < bc; ++b) { // scale A to avoid singularities for(int i = 0; i < m; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } if(!NPVT) { // shuffle rows to test pivoting // always the same permuation for debugging purposes for(int i = 0; i < m / 2; i++) { for(int j = 0; j < n; j++) { tmp = hA[b][i + j * lda]; hA[b][i + j * lda] = hA[b][m - 1 - i + j * lda]; hA[b][m - 1 - i + j * lda] = tmp; } } } } } if(GPU) { // now copy data to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void getrf_getError(const hipsolverHandle_t handle, const int m, const int n, Td& dA, const int lda, const int stA, Td& dWork, const int lwork, Ud& dIpiv, const int stP, Ud& dInfo, const int bc, Th& hA, Th& hARes, Uh& hIpiv, Uh& hIpivRes, Uh& hInfo, Uh& hInfoRes, double* max_err) { // input data initialization getrf_initData( handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_getrf(API, NPVT, handle, m, n, dA.data(), lda, stA, dWork.data(), lwork, dIpiv.data(), stP, dInfo.data(), bc)); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hIpivRes.transfer_from(dIpiv)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack for(int b = 0; b < bc; ++b) cblas_getrf(m, n, hA[b], lda, hIpiv[b], hInfo[b]); // expecting original matrix to be non-singular // error is ||hA - hARes|| / ||hA|| (ideally ||LU - Lres Ures|| / ||LU||) // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm double err; *max_err = 0; for(int b = 0; b < bc; ++b) { err = norm_error('F', m, n, lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; // also check pivoting (count the number of incorrect pivots) if(!NPVT) { err = 0; for(int i = 0; i < min(m, n); ++i) if(hIpiv[b][i] != hIpivRes[b][i]) err++; *max_err = err > *max_err ? err : *max_err; } } // also check info for singularities err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) err++; *max_err += err; } template void getrf_getPerfData(const hipsolverHandle_t handle, const int m, const int n, Td& dA, const int lda, const int stA, Td& dWork, const int lwork, Ud& dIpiv, const int stP, Ud& dInfo, const int bc, Th& hA, Uh& hIpiv, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { if(!perf) { getrf_initData( handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_getrf(m, n, hA[b], lda, hIpiv[b], hInfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } getrf_initData( handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); // cold calls for(int iter = 0; iter < 2; iter++) { getrf_initData( handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); CHECK_ROCBLAS_ERROR(hipsolver_getrf(API, NPVT, handle, m, n, dA.data(), lda, stA, dWork.data(), lwork, dIpiv.data(), stP, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { getrf_initData( handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); start = get_time_us_sync(stream); hipsolver_getrf(API, NPVT, handle, m, n, dA.data(), lda, stA, dWork.data(), lwork, dIpiv.data(), stP, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_getrf(Arguments& argus) { // get arguments hipsolver_local_handle handle; int m = argus.get("m"); int n = argus.get("n", m); int lda = argus.get("lda", m); int stA = argus.get("strideA", lda * n); int stP = argus.get("strideP", min(m, n)); int bc = argus.batch_count; int hot_calls = argus.iters; int stARes = (argus.unit_check || argus.norm_check) ? stA : 0; int stPRes = (argus.unit_check || argus.norm_check) ? stP : 0; // check non-supported values // N/A // determine sizes size_t size_A = size_t(lda) * n; size_t size_P = size_t(min(m, n)); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; size_t size_PRes = (argus.unit_check || argus.norm_check) ? size_P : 0; // check invalid sizes bool invalid_size = (m <= 0 || n <= 0 || lda < m || bc <= 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_getrf(API, // NPVT, // handle, // m, // n, // (T* const*)nullptr, // lda, // stA, // (T*)nullptr, // 0, // (int*)nullptr, // stP, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_getrf(API, NPVT, handle, m, n, (T*)nullptr, lda, stA, (T*)nullptr, 0, (int*)nullptr, stP, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_getrf_bufferSize(API, handle, m, n, (T*)nullptr, lda, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // host_strided_batch_vector hIpiv(size_P, 1, stP, bc); // host_strided_batch_vector hIpivRes(size_PRes, 1, stPRes, bc); // host_strided_batch_vector hInfo(1, 1, 1, bc); // host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_strided_batch_vector dIpiv(size_P, 1, stP, bc); // device_strided_batch_vector dInfo(1, 1, 1, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // if(size_P) // CHECK_HIP_ERROR(dIpiv.memcheck()); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // getrf_getError(handle, // m, // n, // dA, // lda, // stA, // dWork, // size_W, // dIpiv, // stP, // dInfo, // bc, // hA, // hARes, // hIpiv, // hIpivRes, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // getrf_getPerfData(handle, // m, // n, // dA, // lda, // stA, // dWork, // size_W, // dIpiv, // stP, // dInfo, // bc, // hA, // hIpiv, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); host_strided_batch_vector hIpiv(size_P, 1, stP, bc); host_strided_batch_vector hIpivRes(size_PRes, 1, stPRes, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dIpiv(size_P, 1, stP, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) getrf_getError(handle, m, n, dA, lda, stA, dWork, size_W, dIpiv, stP, dInfo, bc, hA, hARes, hIpiv, hIpivRes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) getrf_getPerfData(handle, m, n, dA, lda, stA, dWork, size_W, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using min(m,n) * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, min(m, n)); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("m", "n", "lda", "strideP", "batch_c"); rocsolver_bench_output(m, n, lda, stP, bc); } else if(STRIDED) { rocsolver_bench_output("m", "n", "lda", "strideA", "strideP", "batch_c"); rocsolver_bench_output(m, n, lda, stA, stP, bc); } else { rocsolver_bench_output("m", "n", "lda"); rocsolver_bench_output(m, n, lda); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_getrs.hpp000066400000000000000000001006531436107207300226610ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void getrs_checkBadArgs(const hipsolverHandle_t handle, const hipsolverOperation_t trans, const int m, const int nrhs, T dA, const int lda, const int stA, U dIpiv, const int stP, T dB, const int ldb, const int stB, T dWork, const int lwork, U dInfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS(hipsolver_getrs(API, nullptr, trans, m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_getrs(API, handle, hipsolverOperation_t(-1), m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_getrs(API, handle, trans, m, nrhs, (T) nullptr, lda, stA, dIpiv, stP, dB, ldb, stB, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_getrs(API, handle, trans, m, nrhs, dA, lda, stA, (U) nullptr, stP, dB, ldb, stB, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_getrs(API, handle, trans, m, nrhs, dA, lda, stA, dIpiv, stP, (T) nullptr, ldb, stB, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_getrs(API, handle, trans, m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, dWork, lwork, (U) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_getrs_bad_arg() { // safe arguments hipsolver_local_handle handle; int m = 1; int nrhs = 1; int lda = 1; int ldb = 1; int stA = 1; int stP = 1; int stB = 1; int bc = 1; hipsolverOperation_t trans = HIPSOLVER_OP_N; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_batch_vector dB(1, 1, 1); // device_strided_batch_vector dIpiv(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dB.memcheck()); // CHECK_HIP_ERROR(dIpiv.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // int size_W; // hipsolver_getrs_bufferSize(API, handle, trans, m, nrhs, dA.data(), lda, dIpiv.data(), dB.data(), ldb, &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // getrs_checkBadArgs(handle, // trans, // m, // nrhs, // dA.data(), // lda, // stA, // dIpiv.data(), // stP, // dB.data(), // ldb, // stB, // dInfo.data(), // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dB(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_getrs_bufferSize( API, handle, trans, m, nrhs, dA.data(), lda, dIpiv.data(), dB.data(), ldb, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments getrs_checkBadArgs(handle, trans, m, nrhs, dA.data(), lda, stA, dIpiv.data(), stP, dB.data(), ldb, stB, dWork.data(), size_W, dInfo.data(), bc); } } template void getrs_initData(const hipsolverHandle_t handle, const hipsolverOperation_t trans, const int m, const int nrhs, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Td& dB, const int ldb, const int stB, const int bc, Th& hA, Uh& hIpiv, Th& hB) { if(CPU) { rocblas_init(hA, true); rocblas_init(hB, true); // scale A to avoid singularities for(int b = 0; b < bc; ++b) { for(int i = 0; i < m; i++) { for(int j = 0; j < m; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } // do the LU decomposition of matrix A w/ the reference LAPACK routine for(int b = 0; b < bc; ++b) { int info; cblas_getrf(m, m, hA[b], lda, hIpiv[b], &info); } } if(GPU) { // now copy pivoting indices and matrices to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); CHECK_HIP_ERROR(dIpiv.transfer_from(hIpiv)); } } template void getrs_getError(const hipsolverHandle_t handle, const hipsolverOperation_t trans, const int m, const int nrhs, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Td& dB, const int ldb, const int stB, Td& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Uh& hIpiv, Th& hB, Th& hBRes, Uh& hInfo, Uh& hInfoRes, double* max_err) { // input data initialization getrs_initData( handle, trans, m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_getrs(API, handle, trans, m, nrhs, dA.data(), lda, stA, dIpiv.data(), stP, dB.data(), ldb, stB, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hBRes.transfer_from(dB)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack for(int b = 0; b < bc; ++b) { cblas_getrs(trans, m, nrhs, hA[b], lda, hIpiv[b], hB[b], ldb, hInfo[b]); } // error is ||hB - hBRes|| / ||hB|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using vector-induced infinity norm double err; *max_err = 0; for(int b = 0; b < bc; ++b) { err = norm_error('I', m, nrhs, ldb, hB[b], hBRes[b]); *max_err = err > *max_err ? err : *max_err; } // check info err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) err++; *max_err += err; } template void getrs_getPerfData(const hipsolverHandle_t handle, const hipsolverOperation_t trans, const int m, const int nrhs, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Td& dB, const int ldb, const int stB, Td& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Uh& hIpiv, Th& hB, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { if(!perf) { getrs_initData( handle, trans, m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) { cblas_getrs(trans, m, nrhs, hA[b], lda, hIpiv[b], hB[b], ldb, hInfo[b]); } *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } getrs_initData( handle, trans, m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); // cold calls for(int iter = 0; iter < 2; iter++) { getrs_initData( handle, trans, m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); CHECK_ROCBLAS_ERROR(hipsolver_getrs(API, handle, trans, m, nrhs, dA.data(), lda, stA, dIpiv.data(), stP, dB.data(), ldb, stB, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { getrs_initData( handle, trans, m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, bc, hA, hIpiv, hB); start = get_time_us_sync(stream); hipsolver_getrs(API, handle, trans, m, nrhs, dA.data(), lda, stA, dIpiv.data(), stP, dB.data(), ldb, stB, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_getrs(Arguments& argus) { // get arguments hipsolver_local_handle handle; char transC = argus.get("trans"); int m = argus.get("n"); int nrhs = argus.get("nrhs", m); int lda = argus.get("lda", m); int ldb = argus.get("ldb", m); int stA = argus.get("strideA", lda * m); int stP = argus.get("strideP", m); int stB = argus.get("strideB", ldb * nrhs); hipsolverOperation_t trans = char2hipsolver_operation(transC); int bc = argus.batch_count; int hot_calls = argus.iters; int stBRes = (argus.unit_check || argus.norm_check) ? stB : 0; // check non-supported values // N/A // determine sizes size_t size_A = size_t(lda) * m; size_t size_B = size_t(ldb) * nrhs; size_t size_P = size_t(m); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_BRes = (argus.unit_check || argus.norm_check) ? size_B : 0; // check invalid sizes bool invalid_size = (m < 0 || nrhs < 0 || lda < m || ldb < m || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_getrs(API, // handle, // trans, // m, // nrhs, // (T* const*)nullptr, // lda, // stA, // (int*)nullptr, // stP, // (T* const*)nullptr, // ldb, // stB, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_getrs(API, handle, trans, m, nrhs, (T*)nullptr, lda, stA, (int*)nullptr, stP, (T*)nullptr, ldb, stB, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_getrs_bufferSize( API, handle, trans, m, nrhs, (T*)nullptr, lda, (int*)nullptr, (T*)nullptr, ldb, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hB(size_B, 1, bc); // host_batch_vector hBRes(size_BRes, 1, bc); // host_strided_batch_vector hIpiv(size_P, 1, stP, bc); // host_strided_batch_vector hInfo(1, 1, 1, bc); // host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_batch_vector dB(size_B, 1, bc); // device_strided_batch_vector dIpiv(size_P, 1, stP, bc); // device_strided_batch_vector dInfo(1, 1, 1, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // if(size_B) // CHECK_HIP_ERROR(dB.memcheck()); // if(size_P) // CHECK_HIP_ERROR(dIpiv.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // getrs_getError(handle, // trans, // m, // nrhs, // dA, // lda, // stA, // dIpiv, // stP, // dB, // ldb, // stB, // dWork, // size_W, // dInfo, // bc, // hA, // hIpiv, // hB, // hBRes, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // getrs_getPerfData(handle, // trans, // m, // nrhs, // dA, // lda, // stA, // dIpiv, // stP, // dB, // ldb, // stB, // dWork, // size_W, // dInfo, // bc, // hA, // hIpiv, // hB, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hB(size_B, 1, stB, bc); host_strided_batch_vector hBRes(size_BRes, 1, stBRes, bc); host_strided_batch_vector hIpiv(size_P, 1, stP, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dB(size_B, 1, stB, bc); device_strided_batch_vector dIpiv(size_P, 1, stP, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_B) CHECK_HIP_ERROR(dB.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) getrs_getError(handle, trans, m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, dWork, size_W, dInfo, bc, hA, hIpiv, hB, hBRes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) getrs_getPerfData(handle, trans, m, nrhs, dA, lda, stA, dIpiv, stP, dB, ldb, stB, dWork, size_W, dInfo, bc, hA, hIpiv, hB, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using m * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, m); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("trans", "n", "nrhs", "lda", "ldb", "strideP", "batch_c"); rocsolver_bench_output(transC, m, nrhs, lda, ldb, stP, bc); } else if(STRIDED) { rocsolver_bench_output( "trans", "n", "nrhs", "lda", "ldb", "strideA", "strideP", "strideB", "batch_c"); rocsolver_bench_output(transC, m, nrhs, lda, ldb, stA, stP, stB, bc); } else { rocsolver_bench_output("trans", "n", "nrhs", "lda", "ldb"); rocsolver_bench_output(transC, m, nrhs, lda, ldb); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_orgbr_ungbr.hpp000066400000000000000000000456771436107207300240630ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void orgbr_ungbr_checkBadArgs(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const int m, const int n, const int k, T dA, const int lda, T dIpiv, T dWork, const int lwork, U dInfo) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_orgbr_ungbr(FORTRAN, nullptr, side, m, n, k, dA, lda, dIpiv, dWork, lwork, dInfo), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS( hipsolver_orgbr_ungbr( FORTRAN, handle, hipsolverSideMode_t(-1), m, n, k, dA, lda, dIpiv, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS( hipsolver_orgbr_ungbr( FORTRAN, handle, side, m, n, k, (T) nullptr, lda, dIpiv, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_orgbr_ungbr( FORTRAN, handle, side, m, n, k, dA, lda, (T) nullptr, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_orgbr_ungbr( FORTRAN, handle, side, m, n, k, dA, lda, dIpiv, dWork, lwork, (U) nullptr), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_orgbr_ungbr_bad_arg() { // safe arguments hipsolver_local_handle handle; hipsolverSideMode_t side = HIPSOLVER_SIDE_LEFT; int k = 1; int m = 1; int n = 1; int lda = 1; // memory allocation device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_orgbr_ungbr_bufferSize( FORTRAN, handle, side, m, n, k, dA.data(), lda, dIpiv.data(), &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments orgbr_ungbr_checkBadArgs( handle, side, m, n, k, dA.data(), lda, dIpiv.data(), dWork.data(), size_W, dInfo.data()); } template void orgbr_ungbr_initData(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const int m, const int n, const int k, Td& dA, const int lda, Td& dIpiv, Th& hA, Th& hIpiv, std::vector& hW, size_t size_W) { if(CPU) { using S = decltype(std::real(T{})); int info; size_t s = max(hIpiv.n(), 2); std::vector E(s - 1); std::vector D(s); std::vector P(s); rocblas_init(hA, true); rocblas_init(hIpiv, true); // scale to avoid singularities // and compute gebrd if(side == HIPSOLVER_SIDE_LEFT) { for(int i = 0; i < m; ++i) { for(int j = 0; j < k; ++j) { if(i == j) hA[0][i + j * lda] += 400; else hA[0][i + j * lda] -= 4; } } cblas_gebrd( m, k, hA[0], lda, D.data(), E.data(), hIpiv[0], P.data(), hW.data(), size_W, &info); } else { for(int i = 0; i < k; ++i) { for(int j = 0; j < n; ++j) { if(i == j) hA[0][i + j * lda] += 400; else hA[0][i + j * lda] -= 4; } } cblas_gebrd( k, n, hA[0], lda, D.data(), E.data(), P.data(), hIpiv[0], hW.data(), size_W, &info); } } if(GPU) { // copy data from CPU to device CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dIpiv.transfer_from(hIpiv)); } } template void orgbr_ungbr_getError(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const int m, const int n, const int k, Td& dA, const int lda, Td& dIpiv, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hARes, Th& hIpiv, Uh& hInfo, Uh& hInfoRes, double* max_err) { size_t size_W = max(max(m, n), k); std::vector hW(size_W); // initialize data orgbr_ungbr_initData( handle, side, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_orgbr_ungbr(FORTRAN, handle, side, m, n, k, dA.data(), lda, dIpiv.data(), dWork.data(), lwork, dInfo.data())); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack cblas_orgbr_ungbr(side, m, n, k, hA[0], lda, hIpiv[0], hW.data(), size_W, hInfo[0]); // error is ||hA - hAr|| / ||hA|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm *max_err = norm_error('F', m, n, lda, hA[0], hARes[0]); // check info if(hInfo[0][0] != hInfoRes[0][0]) *max_err++; } template void orgbr_ungbr_getPerfData(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const int m, const int n, const int k, Td& dA, const int lda, Td& dIpiv, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hIpiv, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { size_t size_W = max(max(m, n), k); std::vector hW(size_W); if(!perf) { orgbr_ungbr_initData( handle, side, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); cblas_orgbr_ungbr(side, m, n, k, hA[0], lda, hIpiv[0], hW.data(), size_W, hInfo[0]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } orgbr_ungbr_initData( handle, side, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); // cold calls for(int iter = 0; iter < 2; iter++) { orgbr_ungbr_initData( handle, side, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); CHECK_ROCBLAS_ERROR(hipsolver_orgbr_ungbr(FORTRAN, handle, side, m, n, k, dA.data(), lda, dIpiv.data(), dWork.data(), lwork, dInfo.data())); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { orgbr_ungbr_initData( handle, side, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); start = get_time_us_sync(stream); hipsolver_orgbr_ungbr(FORTRAN, handle, side, m, n, k, dA.data(), lda, dIpiv.data(), dWork.data(), lwork, dInfo.data()); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_orgbr_ungbr(Arguments& argus) { // get arguments hipsolver_local_handle handle; char sideC = argus.get("side"); int m, n; if(sideC == 'R') { m = argus.get("m"); n = argus.get("n", m); } else { n = argus.get("n"); m = argus.get("m", n); } int k = argus.get("k", min(m, n)); int lda = argus.get("lda", m); hipsolverSideMode_t side = char2hipsolver_side(sideC); int hot_calls = argus.iters; // check non-supported values // N/A // determine sizes // size_P could be zero in test cases that are not quick-return or invalid // cases setting it to one to avoid possible memory access errors in the rest // of the unit test bool row = (side == HIPSOLVER_SIDE_RIGHT); size_t size_A = row ? size_t(lda) * n : size_t(lda) * max(n, k); size_t size_P = row ? max(min(n, k), 1) : max(min(m, k), 1); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; // check invalid sizes bool invalid_size = ((m < 0 || n < 0 || k < 0 || lda < m) || (row && (m > n || m < min(n, k))) || (!row && (n > m || n < min(m, k)))); if(invalid_size) { EXPECT_ROCBLAS_STATUS(hipsolver_orgbr_ungbr(FORTRAN, handle, side, m, n, k, (T*)nullptr, lda, (T*)nullptr, (T*)nullptr, 0, (int*)nullptr), HIPSOLVER_STATUS_INVALID_VALUE); if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_orgbr_ungbr_bufferSize( FORTRAN, handle, side, m, n, k, (T*)nullptr, lda, (T*)nullptr, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations host_strided_batch_vector hA(size_A, 1, size_A, 1); host_strided_batch_vector hARes(size_ARes, 1, size_ARes, 1); host_strided_batch_vector hIpiv(size_P, 1, size_P, 1); host_strided_batch_vector hInfo(1, 1, 1, 1); host_strided_batch_vector hInfoRes(1, 1, 1, 1); device_strided_batch_vector dA(size_A, 1, size_A, 1); device_strided_batch_vector dIpiv(size_P, 1, size_P, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) orgbr_ungbr_getError(handle, side, m, n, k, dA, lda, dIpiv, dWork, size_W, dInfo, hA, hARes, hIpiv, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) orgbr_ungbr_getPerfData(handle, side, m, n, k, dA, lda, dIpiv, dWork, size_W, dInfo, hA, hIpiv, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); // validate results for rocsolver-test // using s * machine_precision as tolerance int s = row ? n : m; if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, s); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; rocsolver_bench_output("side", "m", "n", "k", "lda"); rocsolver_bench_output(sideC, m, n, k, lda); std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_orgqr_ungqr.hpp000066400000000000000000000401101436107207300240720ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void orgqr_ungqr_checkBadArgs(const hipsolverHandle_t handle, const int m, const int n, const int k, T dA, const int lda, T dIpiv, T dWork, const int lwork, U dInfo) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_orgqr_ungqr(FORTRAN, nullptr, m, n, k, dA, lda, dIpiv, dWork, lwork, dInfo), HIPSOLVER_STATUS_NOT_INITIALIZED); // values // N/A #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS( hipsolver_orgqr_ungqr( FORTRAN, handle, m, n, k, (T) nullptr, lda, dIpiv, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_orgqr_ungqr(FORTRAN, handle, m, n, k, dA, lda, (T) nullptr, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_orgqr_ungqr(FORTRAN, handle, m, n, k, dA, lda, dIpiv, dWork, lwork, (U) nullptr), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_orgqr_ungqr_bad_arg() { // safe arguments hipsolver_local_handle handle; int k = 1; int m = 1; int n = 1; int lda = 1; // memory allocation device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_orgqr_ungqr_bufferSize( FORTRAN, handle, m, n, k, dA.data(), lda, dIpiv.data(), &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments orgqr_ungqr_checkBadArgs( handle, m, n, k, dA.data(), lda, dIpiv.data(), dWork.data(), size_W, dInfo.data()); } template void orgqr_ungqr_initData(const hipsolverHandle_t handle, const int m, const int n, const int k, Td& dA, const int lda, Td& dIpiv, Th& hA, Th& hIpiv, std::vector& hW, size_t size_W) { if(CPU) { int info; rocblas_init(hA, true); rocblas_init(hIpiv, true); // scale to avoid singularities for(int i = 0; i < m; ++i) { for(int j = 0; j < k; ++j) { if(i == j) hA[0][i + j * lda] += 400; else hA[0][i + j * lda] -= 4; } } // compute QR factorization cblas_geqrf(m, n, hA[0], lda, hIpiv[0], hW.data(), size_W, &info); } if(GPU) { // copy data from CPU to device CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dIpiv.transfer_from(hIpiv)); } } template void orgqr_ungqr_getError(const hipsolverHandle_t handle, const int m, const int n, const int k, Td& dA, const int lda, Td& dIpiv, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hARes, Th& hIpiv, Uh& hInfo, Uh& hInfoRes, double* max_err) { size_t size_W = size_t(n); std::vector hW(size_W); // initialize data orgqr_ungqr_initData(handle, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_orgqr_ungqr( FORTRAN, handle, m, n, k, dA.data(), lda, dIpiv.data(), dWork.data(), lwork, dInfo.data())); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack cblas_orgqr_ungqr(m, n, k, hA[0], lda, hIpiv[0], hW.data(), size_W, hInfo[0]); // error is ||hA - hAr|| / ||hA|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm *max_err = norm_error('F', m, n, lda, hA[0], hARes[0]); // check info if(hInfo[0][0] != hInfoRes[0][0]) *max_err++; } template void orgqr_ungqr_getPerfData(const hipsolverHandle_t handle, const int m, const int n, const int k, Td& dA, const int lda, Td& dIpiv, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hIpiv, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { size_t size_W = size_t(n); std::vector hW(size_W); if(!perf) { orgqr_ungqr_initData( handle, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); cblas_orgqr_ungqr(m, n, k, hA[0], lda, hIpiv[0], hW.data(), size_W, hInfo[0]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } orgqr_ungqr_initData(handle, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); // cold calls for(int iter = 0; iter < 2; iter++) { orgqr_ungqr_initData( handle, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); CHECK_ROCBLAS_ERROR(hipsolver_orgqr_ungqr(FORTRAN, handle, m, n, k, dA.data(), lda, dIpiv.data(), dWork.data(), lwork, dInfo.data())); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { orgqr_ungqr_initData( handle, m, n, k, dA, lda, dIpiv, hA, hIpiv, hW, size_W); start = get_time_us_sync(stream); hipsolver_orgqr_ungqr(FORTRAN, handle, m, n, k, dA.data(), lda, dIpiv.data(), dWork.data(), lwork, dInfo.data()); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_orgqr_ungqr(Arguments& argus) { // get arguments hipsolver_local_handle handle; int n = argus.get("n"); int m = argus.get("m", n); int k = argus.get("k", n); int lda = argus.get("lda", m); int hot_calls = argus.iters; // check non-supported values // N/A // determine sizes size_t size_A = size_t(lda) * n; size_t size_P = size_t(n); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; // check invalid sizes bool invalid_size = (m < 0 || n < 0 || k < 0 || lda < m || n > m || k > n); if(invalid_size) { EXPECT_ROCBLAS_STATUS(hipsolver_orgqr_ungqr(FORTRAN, handle, m, n, k, (T*)nullptr, lda, (T*)nullptr, (T*)nullptr, 0, (int*)nullptr), HIPSOLVER_STATUS_INVALID_VALUE); if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_orgqr_ungqr_bufferSize( FORTRAN, handle, m, n, k, (T*)nullptr, lda, (T*)nullptr, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations host_strided_batch_vector hA(size_A, 1, size_A, 1); host_strided_batch_vector hARes(size_ARes, 1, size_ARes, 1); host_strided_batch_vector hIpiv(size_P, 1, size_P, 1); host_strided_batch_vector hInfo(1, 1, 1, 1); host_strided_batch_vector hInfoRes(1, 1, 1, 1); device_strided_batch_vector dA(size_A, 1, size_A, 1); device_strided_batch_vector dIpiv(size_P, 1, size_P, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) orgqr_ungqr_getError(handle, m, n, k, dA, lda, dIpiv, dWork, size_W, dInfo, hA, hARes, hIpiv, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) orgqr_ungqr_getPerfData(handle, m, n, k, dA, lda, dIpiv, dWork, size_W, dInfo, hA, hIpiv, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); // validate results for rocsolver-test // using m * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, m); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; rocsolver_bench_output("m", "n", "k", "lda"); rocsolver_bench_output(m, n, k, lda); std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_orgtr_ungtr.hpp000066400000000000000000000403711436107207300241110ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void orgtr_ungtr_checkBadArgs(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, T dIpiv, T dWork, const int lwork, U dInfo) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_orgtr_ungtr(FORTRAN, nullptr, uplo, n, dA, lda, dIpiv, dWork, lwork, dInfo), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS( hipsolver_orgtr_ungtr( FORTRAN, handle, hipsolverFillMode_t(-1), n, dA, lda, dIpiv, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS( hipsolver_orgtr_ungtr( FORTRAN, handle, uplo, n, (T) nullptr, lda, dIpiv, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_orgtr_ungtr(FORTRAN, handle, uplo, n, dA, lda, (T) nullptr, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_orgtr_ungtr(FORTRAN, handle, uplo, n, dA, lda, dIpiv, dWork, lwork, (U) nullptr), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_orgtr_ungtr_bad_arg() { // safe arguments hipsolver_local_handle handle; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; int n = 1; int lda = 1; // memory allocation device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_orgtr_ungtr_bufferSize( FORTRAN, handle, uplo, n, dA.data(), lda, dIpiv.data(), &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments orgtr_ungtr_checkBadArgs( handle, uplo, n, dA.data(), lda, dIpiv.data(), dWork.data(), size_W, dInfo.data()); } template void orgtr_ungtr_initData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, Td& dIpiv, Th& hA, Th& hIpiv, std::vector& hW, size_t size_W) { if(CPU) { using S = decltype(std::real(T{})); size_t s = max(hIpiv.n(), 2); std::vector E(s - 1); std::vector D(s); rocblas_init(hA, true); rocblas_init(hIpiv, true); // scale to avoid singularities for(int i = 0; i < n; ++i) { for(int j = 0; j < n; ++j) { if(i == j) hA[0][i + j * lda] += 400; else hA[0][i + j * lda] -= 4; } } // compute sytrd/hetrd cblas_sytrd_hetrd(uplo, n, hA[0], lda, D.data(), E.data(), hIpiv[0], hW.data(), size_W); } if(GPU) { // copy data from CPU to device CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dIpiv.transfer_from(hIpiv)); } } template void orgtr_ungtr_getError(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, Td& dIpiv, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hARes, Th& hIpiv, Uh& hInfo, Uh& hInfoRes, double* max_err) { size_t size_W = n * 32; std::vector hW(size_W); // initialize data orgtr_ungtr_initData(handle, uplo, n, dA, lda, dIpiv, hA, hIpiv, hW, size_W); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_orgtr_ungtr( FORTRAN, handle, uplo, n, dA.data(), lda, dIpiv.data(), dWork.data(), lwork, dInfo.data())); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack cblas_orgtr_ungtr(uplo, n, hA[0], lda, hIpiv[0], hW.data(), size_W, hInfo[0]); // error is ||hA - hAr|| / ||hA|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm *max_err = norm_error('F', n, n, lda, hA[0], hARes[0]); // check info if(hInfo[0][0] != hInfoRes[0][0]) *max_err++; } template void orgtr_ungtr_getPerfData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, Td& dIpiv, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hIpiv, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { size_t size_W = n * 32; std::vector hW(size_W); if(!perf) { orgtr_ungtr_initData( handle, uplo, n, dA, lda, dIpiv, hA, hIpiv, hW, size_W); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); cblas_orgtr_ungtr(uplo, n, hA[0], lda, hIpiv[0], hW.data(), size_W, hInfo[0]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } orgtr_ungtr_initData(handle, uplo, n, dA, lda, dIpiv, hA, hIpiv, hW, size_W); // cold calls for(int iter = 0; iter < 2; iter++) { orgtr_ungtr_initData( handle, uplo, n, dA, lda, dIpiv, hA, hIpiv, hW, size_W); CHECK_ROCBLAS_ERROR(hipsolver_orgtr_ungtr(FORTRAN, handle, uplo, n, dA.data(), lda, dIpiv.data(), dWork.data(), lwork, dInfo.data())); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { orgtr_ungtr_initData( handle, uplo, n, dA, lda, dIpiv, hA, hIpiv, hW, size_W); start = get_time_us_sync(stream); hipsolver_orgtr_ungtr(FORTRAN, handle, uplo, n, dA.data(), lda, dIpiv.data(), dWork.data(), lwork, dInfo.data()); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_orgtr_ungtr(Arguments& argus) { // get arguments hipsolver_local_handle handle; char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int hot_calls = argus.iters; // check non-supported values // N/A // determine sizes // size_P could be zero in test cases that are not quick-return or invalid // cases setting it to one to avoid possible memory access errors in the rest // of the unit test size_t size_A = size_t(lda) * n; size_t size_P = size_t(n); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n); if(invalid_size) { EXPECT_ROCBLAS_STATUS(hipsolver_orgtr_ungtr(FORTRAN, handle, uplo, n, (T*)nullptr, lda, (T*)nullptr, (T*)nullptr, 0, (int*)nullptr), HIPSOLVER_STATUS_INVALID_VALUE); if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_orgtr_ungtr_bufferSize( FORTRAN, handle, uplo, n, (T*)nullptr, lda, (T*)nullptr, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations host_strided_batch_vector hA(size_A, 1, size_A, 1); host_strided_batch_vector hARes(size_ARes, 1, size_ARes, 1); host_strided_batch_vector hIpiv(size_P, 1, size_P, 1); host_strided_batch_vector hInfo(1, 1, 1, 1); host_strided_batch_vector hInfoRes(1, 1, 1, 1); device_strided_batch_vector dA(size_A, 1, size_A, 1); device_strided_batch_vector dIpiv(size_P, 1, size_P, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) orgtr_ungtr_getError(handle, uplo, n, dA, lda, dIpiv, dWork, size_W, dInfo, hA, hARes, hIpiv, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) orgtr_ungtr_getPerfData(handle, uplo, n, dA, lda, dIpiv, dWork, size_W, dInfo, hA, hIpiv, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); // validate results for rocsolver-test // using n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; rocsolver_bench_output("uplo", "n", "lda"); rocsolver_bench_output(uploC, n, lda); std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_ormqr_unmqr.hpp000066400000000000000000000734511436107207300241240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void ormqr_unmqr_checkBadArgs(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const hipsolverOperation_t trans, const int m, const int n, const int k, T dA, const int lda, T dIpiv, T dC, const int ldc, T dWork, const int lwork, U dInfo) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_ormqr_unmqr( FORTRAN, nullptr, side, trans, m, n, k, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, hipsolverSideMode_t(-1), trans, m, n, k, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, side, hipsolverOperation_t(-1), m, n, k, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_ENUM); if(COMPLEX) EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, side, HIPSOLVER_OP_T, m, n, k, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); else EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, side, HIPSOLVER_OP_C, m, n, k, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, side, trans, m, n, k, (T) nullptr, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, side, trans, m, n, k, dA, lda, (T) nullptr, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, side, trans, m, n, k, dA, lda, dIpiv, (T) nullptr, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, side, trans, m, n, k, dA, lda, dIpiv, dC, ldc, dWork, lwork, (U) nullptr), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template > void testing_ormqr_unmqr_bad_arg() { // safe arguments hipsolver_local_handle handle; hipsolverSideMode_t side = HIPSOLVER_SIDE_LEFT; hipsolverOperation_t trans = HIPSOLVER_OP_N; int k = 1; int m = 1; int n = 1; int lda = 1; int ldc = 1; // memory allocation device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dC(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_ormqr_unmqr_bufferSize(FORTRAN, handle, side, trans, m, n, k, dA.data(), lda, dIpiv.data(), dC.data(), ldc, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments ormqr_unmqr_checkBadArgs(handle, side, trans, m, n, k, dA.data(), lda, dIpiv.data(), dC.data(), ldc, dWork.data(), size_W, dInfo.data()); } template void ormqr_unmqr_initData(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const hipsolverOperation_t trans, const int m, const int n, const int k, Td& dA, const int lda, Td& dIpiv, Td& dC, const int ldc, Th& hA, Th& hIpiv, Th& hC, std::vector& hW, size_t size_W) { if(CPU) { int info; int nq = (side == HIPSOLVER_SIDE_LEFT) ? m : n; rocblas_init(hA, true); rocblas_init(hIpiv, true); rocblas_init(hC, true); // scale to avoid singularities for(int i = 0; i < nq; ++i) { for(int j = 0; j < k; ++j) { if(i == j) hA[0][i + j * lda] += 400; else hA[0][i + j * lda] -= 4; } } // compute QR factorization cblas_geqrf(nq, k, hA[0], lda, hIpiv[0], hW.data(), size_W, &info); } if(GPU) { // copy data from CPU to device CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dIpiv.transfer_from(hIpiv)); CHECK_HIP_ERROR(dC.transfer_from(hC)); } } template void ormqr_unmqr_getError(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const hipsolverOperation_t trans, const int m, const int n, const int k, Td& dA, const int lda, Td& dIpiv, Td& dC, const int ldc, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hIpiv, Th& hC, Th& hCRes, Uh& hInfo, Uh& hInfoRes, double* max_err) { size_t size_W = max(max(m, n), k); std::vector hW(size_W); // initialize data ormqr_unmqr_initData( handle, side, trans, m, n, k, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_ormqr_unmqr(FORTRAN, handle, side, trans, m, n, k, dA.data(), lda, dIpiv.data(), dC.data(), ldc, dWork.data(), lwork, dInfo.data())); CHECK_HIP_ERROR(hCRes.transfer_from(dC)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack cblas_ormqr_unmqr( side, trans, m, n, k, hA[0], lda, hIpiv[0], hC[0], ldc, hW.data(), size_W, hInfo[0]); // error is ||hC - hCr|| / ||hC|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm *max_err = norm_error('F', m, n, ldc, hC[0], hCRes[0]); // check info if(hInfo[0][0] != hInfoRes[0][0]) *max_err++; } template void ormqr_unmqr_getPerfData(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const hipsolverOperation_t trans, const int m, const int n, const int k, Td& dA, const int lda, Td& dIpiv, Td& dC, const int ldc, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hIpiv, Th& hC, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { size_t size_W = max(max(m, n), k); std::vector hW(size_W); if(!perf) { ormqr_unmqr_initData( handle, side, trans, m, n, k, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); cblas_ormqr_unmqr( side, trans, m, n, k, hA[0], lda, hIpiv[0], hC[0], ldc, hW.data(), size_W, hInfo[0]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } ormqr_unmqr_initData( handle, side, trans, m, n, k, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); // cold calls for(int iter = 0; iter < 2; iter++) { ormqr_unmqr_initData( handle, side, trans, m, n, k, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); CHECK_ROCBLAS_ERROR(hipsolver_ormqr_unmqr(FORTRAN, handle, side, trans, m, n, k, dA.data(), lda, dIpiv.data(), dC.data(), ldc, dWork.data(), lwork, dInfo.data())); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { ormqr_unmqr_initData( handle, side, trans, m, n, k, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); start = get_time_us_sync(stream); hipsolver_ormqr_unmqr(FORTRAN, handle, side, trans, m, n, k, dA.data(), lda, dIpiv.data(), dC.data(), ldc, dWork.data(), lwork, dInfo.data()); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template > void testing_ormqr_unmqr(Arguments& argus) { // get arguments hipsolver_local_handle handle; char sideC = argus.get("side"); char transC = argus.get("trans"); int m, n, k; if(sideC == 'L') { m = argus.get("m"); n = argus.get("n", m); k = argus.get("k", m); } else { n = argus.get("n"); m = argus.get("m", n); k = argus.get("k", n); } int lda = argus.get("lda", sideC == 'L' ? m : n); int ldc = argus.get("ldc", m); hipsolverSideMode_t side = char2hipsolver_side(sideC); hipsolverOperation_t trans = char2hipsolver_operation(transC); int hot_calls = argus.iters; // check non-supported values bool invalid_value = ((COMPLEX && trans == HIPSOLVER_OP_T) || (!COMPLEX && trans == HIPSOLVER_OP_C)); if(invalid_value) { EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, side, trans, m, n, k, (T*)nullptr, lda, (T*)nullptr, (T*)nullptr, ldc, (T*)nullptr, 0, (int*)nullptr), HIPSOLVER_STATUS_INVALID_VALUE); if(argus.timing) rocsolver_bench_inform(inform_invalid_args); return; } // determine sizes bool left = (side == HIPSOLVER_SIDE_LEFT); size_t size_A = size_t(lda) * k; size_t size_P = size_t(k); size_t size_C = size_t(ldc) * n; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_CRes = (argus.unit_check || argus.norm_check) ? size_C : 0; // check invalid sizes bool invalid_size = ((m < 0 || n < 0 || k < 0 || ldc < m) || (left && (lda < m || k > m)) || (!left && (lda < n || k > n))); if(invalid_size) { EXPECT_ROCBLAS_STATUS(hipsolver_ormqr_unmqr(FORTRAN, handle, side, trans, m, n, k, (T*)nullptr, lda, (T*)nullptr, (T*)nullptr, ldc, (T*)nullptr, 0, (int*)nullptr), HIPSOLVER_STATUS_INVALID_VALUE); if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_ormqr_unmqr_bufferSize(FORTRAN, handle, side, trans, m, n, k, (T*)nullptr, lda, (T*)nullptr, (T*)nullptr, ldc, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations host_strided_batch_vector hC(size_C, 1, size_C, 1); host_strided_batch_vector hCRes(size_CRes, 1, size_CRes, 1); host_strided_batch_vector hIpiv(size_P, 1, size_P, 1); host_strided_batch_vector hA(size_A, 1, size_A, 1); host_strided_batch_vector hInfo(1, 1, 1, 1); host_strided_batch_vector hInfoRes(1, 1, 1, 1); device_strided_batch_vector dC(size_C, 1, size_C, 1); device_strided_batch_vector dIpiv(size_P, 1, size_P, 1); device_strided_batch_vector dA(size_A, 1, size_A, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); if(size_C) CHECK_HIP_ERROR(dC.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) ormqr_unmqr_getError(handle, side, trans, m, n, k, dA, lda, dIpiv, dC, ldc, dWork, size_W, dInfo, hA, hIpiv, hC, hCRes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) ormqr_unmqr_getPerfData(handle, side, trans, m, n, k, dA, lda, dIpiv, dC, ldc, dWork, size_W, dInfo, hA, hIpiv, hC, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); // validate results for rocsolver-test // using s * machine_precision as tolerance int s = left ? m : n; if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, s); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; rocsolver_bench_output("side", "trans", "m", "n", "k", "lda", "ldc"); rocsolver_bench_output(sideC, transC, m, n, k, lda, ldc); std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_ormtr_unmtr.hpp000066400000000000000000000771171436107207300241350ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void ormtr_unmtr_checkBadArgs(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const hipsolverFillMode_t uplo, const hipsolverOperation_t trans, const int m, const int n, T dA, const int lda, T dIpiv, T dC, const int ldc, T dWork, const int lwork, U dInfo) { // handle EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, nullptr, side, uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, hipsolverSideMode_t(-1), uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, hipsolverFillMode_t(-1), trans, m, n, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, hipsolverOperation_t(-1), m, n, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_ENUM); if(COMPLEX) EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, HIPSOLVER_OP_T, m, n, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); else EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, HIPSOLVER_OP_C, m, n, dA, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, trans, m, n, (T) nullptr, lda, dIpiv, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, trans, m, n, dA, lda, (T) nullptr, dC, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, trans, m, n, dA, lda, dIpiv, (T) nullptr, ldc, dWork, lwork, dInfo), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, dWork, lwork, (U) nullptr), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template > void testing_ormtr_unmtr_bad_arg() { // safe arguments hipsolver_local_handle handle; hipsolverSideMode_t side = HIPSOLVER_SIDE_LEFT; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; hipsolverOperation_t trans = HIPSOLVER_OP_N; int m = 1; int n = 1; int lda = 1; int ldc = 1; // memory allocation device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dC(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dC.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_ormtr_unmtr_bufferSize(FORTRAN, handle, side, uplo, trans, m, n, dA.data(), lda, dIpiv.data(), dC.data(), ldc, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments ormtr_unmtr_checkBadArgs(handle, side, uplo, trans, m, n, dA.data(), lda, dIpiv.data(), dC.data(), ldc, dWork.data(), size_W, dInfo.data()); } template void ormtr_unmtr_initData(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const hipsolverFillMode_t uplo, const hipsolverOperation_t trans, const int m, const int n, Td& dA, const int lda, Td& dIpiv, Td& dC, const int ldc, Th& hA, Th& hIpiv, Th& hC, std::vector& hW, size_t size_W) { if(CPU) { using S = decltype(std::real(T{})); int nq = (side == HIPSOLVER_SIDE_LEFT) ? m : n; std::vector E(nq - 1); std::vector D(nq); rocblas_init(hA, true); rocblas_init(hIpiv, true); rocblas_init(hC, true); // scale to avoid singularities for(int i = 0; i < nq; ++i) { for(int j = 0; j < nq; ++j) { if(i == j) hA[0][i + j * lda] += 400; else hA[0][i + j * lda] -= 4; } } // compute sytrd/hetrd cblas_sytrd_hetrd(uplo, nq, hA[0], lda, D.data(), E.data(), hIpiv[0], hW.data(), size_W); } if(GPU) { // copy data from CPU to device CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dIpiv.transfer_from(hIpiv)); CHECK_HIP_ERROR(dC.transfer_from(hC)); } } template void ormtr_unmtr_getError(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const hipsolverFillMode_t uplo, const hipsolverOperation_t trans, const int m, const int n, Td& dA, const int lda, Td& dIpiv, Td& dC, const int ldc, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hIpiv, Th& hC, Th& hCRes, Uh& hInfo, Uh& hInfoRes, double* max_err) { size_t size_W = (side == HIPSOLVER_SIDE_LEFT ? m : n) * 32; std::vector hW(size_W); // initialize data ormtr_unmtr_initData( handle, side, uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, trans, m, n, dA.data(), lda, dIpiv.data(), dC.data(), ldc, dWork.data(), lwork, dInfo.data())); CHECK_HIP_ERROR(hCRes.transfer_from(dC)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack cblas_ormtr_unmtr( side, uplo, trans, m, n, hA[0], lda, hIpiv[0], hC[0], ldc, hW.data(), size_W, hInfo[0]); // error is ||hC - hCr|| / ||hC|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm *max_err = norm_error('F', m, n, ldc, hC[0], hCRes[0]); // check info if(hInfo[0][0] != hInfoRes[0][0]) *max_err++; } template void ormtr_unmtr_getPerfData(const hipsolverHandle_t handle, const hipsolverSideMode_t side, const hipsolverFillMode_t uplo, const hipsolverOperation_t trans, const int m, const int n, Td& dA, const int lda, Td& dIpiv, Td& dC, const int ldc, Td& dWork, const int lwork, Ud& dInfo, Th& hA, Th& hIpiv, Th& hC, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { size_t size_W = (side == HIPSOLVER_SIDE_LEFT ? m : n) * 32; std::vector hW(size_W); if(!perf) { ormtr_unmtr_initData( handle, side, uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); cblas_ormtr_unmtr( side, uplo, trans, m, n, hA[0], lda, hIpiv[0], hC[0], ldc, hW.data(), size_W, hInfo[0]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } ormtr_unmtr_initData( handle, side, uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); // cold calls for(int iter = 0; iter < 2; iter++) { ormtr_unmtr_initData( handle, side, uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); CHECK_ROCBLAS_ERROR(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, trans, m, n, dA.data(), lda, dIpiv.data(), dC.data(), ldc, dWork.data(), lwork, dInfo.data())); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { ormtr_unmtr_initData( handle, side, uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, hA, hIpiv, hC, hW, size_W); start = get_time_us_sync(stream); hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, trans, m, n, dA.data(), lda, dIpiv.data(), dC.data(), ldc, dWork.data(), lwork, dInfo.data()); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template > void testing_ormtr_unmtr(Arguments& argus) { // get arguments hipsolver_local_handle handle; char sideC = argus.get("side"); char uploC = argus.get("uplo"); char transC = argus.get("trans"); int m, n; if(sideC == 'L') { m = argus.get("m"); n = argus.get("n", m); } else { n = argus.get("n"); m = argus.get("m", n); } int nq = (sideC == 'L' ? m : n); int lda = argus.get("lda", nq); int ldc = argus.get("ldc", m); hipsolverSideMode_t side = char2hipsolver_side(sideC); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); hipsolverOperation_t trans = char2hipsolver_operation(transC); int hot_calls = argus.iters; // check non-supported values bool invalid_value = ((COMPLEX && trans == HIPSOLVER_OP_T) || (!COMPLEX && trans == HIPSOLVER_OP_C)); if(invalid_value) { EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, trans, m, n, (T*)nullptr, lda, (T*)nullptr, (T*)nullptr, ldc, (T*)nullptr, 0, (int*)nullptr), HIPSOLVER_STATUS_INVALID_VALUE); if(argus.timing) rocsolver_bench_inform(inform_invalid_args); return; } // determine sizes bool left = (side == HIPSOLVER_SIDE_LEFT); size_t size_P = size_t(nq); size_t size_C = size_t(ldc) * n; size_t size_A = size_t(lda) * nq; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_CRes = (argus.unit_check || argus.norm_check) ? size_C : 0; // check invalid sizes bool invalid_size = (m < 0 || n < 0 || ldc < m || lda < nq); if(invalid_size) { EXPECT_ROCBLAS_STATUS(hipsolver_ormtr_unmtr(FORTRAN, handle, side, uplo, trans, m, n, (T*)nullptr, lda, (T*)nullptr, (T*)nullptr, ldc, (T*)nullptr, 0, (int*)nullptr), HIPSOLVER_STATUS_INVALID_VALUE); if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_ormtr_unmtr_bufferSize(FORTRAN, handle, side, uplo, trans, m, n, (T*)nullptr, lda, (T*)nullptr, (T*)nullptr, ldc, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations host_strided_batch_vector hC(size_C, 1, size_C, 1); host_strided_batch_vector hCRes(size_CRes, 1, size_CRes, 1); host_strided_batch_vector hIpiv(size_P, 1, size_P, 1); host_strided_batch_vector hA(size_A, 1, size_A, 1); host_strided_batch_vector hInfo(1, 1, 1, 1); host_strided_batch_vector hInfoRes(1, 1, 1, 1); device_strided_batch_vector dC(size_C, 1, size_C, 1); device_strided_batch_vector dIpiv(size_P, 1, size_P, 1); device_strided_batch_vector dA(size_A, 1, size_A, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); device_strided_batch_vector dWork(size_W, 1, size_W, 1); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); if(size_C) CHECK_HIP_ERROR(dC.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) ormtr_unmtr_getError(handle, side, uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, dWork, size_W, dInfo, hA, hIpiv, hC, hCRes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) ormtr_unmtr_getPerfData(handle, side, uplo, trans, m, n, dA, lda, dIpiv, dC, ldc, dWork, size_W, dInfo, hA, hIpiv, hC, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); // validate results for rocsolver-test // using s * machine_precision as tolerance int s = left ? m : n; if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, s); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; rocsolver_bench_output("side", "uplo", "trans", "m", "n", "lda", "ldc"); rocsolver_bench_output(sideC, uploC, transC, m, n, lda, ldc); std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_potrf.hpp000066400000000000000000000474071436107207300226760ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void potrf_checkBadArgs(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, U dWork, const int lwork, V dinfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_potrf(API, nullptr, uplo, n, dA, lda, stA, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS( hipsolver_potrf( API, handle, hipsolverFillMode_t(-1), n, dA, lda, stA, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS( hipsolver_potrf(API, handle, uplo, n, (T) nullptr, lda, stA, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_potrf(API, handle, uplo, n, dA, lda, stA, dWork, lwork, (V) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_potrf_bad_arg() { // safe arguments hipsolver_local_handle handle; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; int n = 1; int lda = 1; int stA = 1; int bc = 1; if(BATCHED) { // memory allocations device_batch_vector dA(1, 1, 1); device_strided_batch_vector dinfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); int size_W; hipsolver_potrf_bufferSize(API, handle, uplo, n, dA.data(), lda, &size_W, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments potrf_checkBadArgs( handle, uplo, n, dA.data(), lda, stA, dWork.data(), size_W, dinfo.data(), bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dinfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); int size_W; hipsolver_potrf_bufferSize(API, handle, uplo, n, dA.data(), lda, &size_W, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments potrf_checkBadArgs( handle, uplo, n, dA.data(), lda, stA, dWork.data(), size_W, dinfo.data(), bc); } } template void potrf_initData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Ud& dInfo, const int bc, Th& hA, Uh& hInfo) { if(CPU) { rocblas_init(hA, true); for(rocblas_int b = 0; b < bc; ++b) { // scale to ensure positive definiteness for(rocblas_int i = 0; i < n; i++) hA[b][i + i * lda] = hA[b][i + i * lda] * conj(hA[b][i + i * lda]) * 400; } } if(GPU) { // now copy data to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void potrf_getError(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Vd& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Th& hARes, Uh& hInfo, Uh& hInfoRes, double* max_err) { // input data initialization potrf_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_potrf( API, handle, uplo, n, dA.data(), lda, stA, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack for(int b = 0; b < bc; ++b) cblas_potrf(uplo, n, hA[b], lda, hInfo[b]); // error is ||hA - hARes|| / ||hA|| (ideally ||LL' - Lres Lres'|| / ||LL'||) // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm double err; int nn; *max_err = 0; for(int b = 0; b < bc; ++b) { nn = hInfoRes[b][0] == 0 ? n : hInfoRes[b][0]; // (TODO: For now, the algorithm is modifying the whole input matrix even when // it is not positive definite. So we only check the principal nn-by-nn submatrix. // Once this is corrected, nn could be always equal to n.) if(uplo == HIPSOLVER_FILL_MODE_UPPER) err = norm_error_upperTr('F', nn, nn, lda, hA[b], hARes[b]); else err = norm_error_lowerTr('F', nn, nn, lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; } // also check info for non positive definite cases err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) err++; *max_err += err; } template void potrf_getPerfData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Vd& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { if(!perf) { potrf_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_potrf(uplo, n, hA[b], lda, hInfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } potrf_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); // cold calls for(int iter = 0; iter < 2; iter++) { potrf_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); CHECK_ROCBLAS_ERROR(hipsolver_potrf( API, handle, uplo, n, dA.data(), lda, stA, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { potrf_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); start = get_time_us_sync(stream); hipsolver_potrf( API, handle, uplo, n, dA.data(), lda, stA, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_potrf(Arguments& argus) { // get arguments hipsolver_local_handle handle; char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int stA = argus.get("strideA", lda * n); int bc = argus.batch_count; hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int hot_calls = argus.iters; rocblas_stride stARes = (argus.unit_check || argus.norm_check) ? stA : 0; // check non-supported values if(uplo != HIPSOLVER_FILL_MODE_UPPER && uplo != HIPSOLVER_FILL_MODE_LOWER) { if(BATCHED) { EXPECT_ROCBLAS_STATUS(hipsolver_potrf(API, handle, uplo, n, (T**)nullptr, lda, stA, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS( hipsolver_potrf( API, handle, uplo, n, (T*)nullptr, lda, stA, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_args); return; } // determine sizes size_t size_A = size_t(lda) * n; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || bc < 0); if(invalid_size) { if(BATCHED) { EXPECT_ROCBLAS_STATUS(hipsolver_potrf(API, handle, uplo, n, (T**)nullptr, lda, stA, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS( hipsolver_potrf( API, handle, uplo, n, (T*)nullptr, lda, stA, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; if(BATCHED) hipsolver_potrf_bufferSize(API, handle, uplo, n, (T**)nullptr, lda, &size_W, bc); else hipsolver_potrf_bufferSize(API, handle, uplo, n, (T*)nullptr, lda, &size_W, bc); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // memory allocations host_batch_vector hA(size_A, 1, bc); host_batch_vector hARes(size_ARes, 1, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_batch_vector dA(size_A, 1, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) potrf_getError(handle, uplo, n, dA, lda, stA, dWork, size_W, dInfo, bc, hA, hARes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) potrf_getPerfData(handle, uplo, n, dA, lda, stA, dWork, size_W, dInfo, bc, hA, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) potrf_getError(handle, uplo, n, dA, lda, stA, dWork, size_W, dInfo, bc, hA, hARes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) potrf_getPerfData(handle, uplo, n, dA, lda, stA, dWork, size_W, dInfo, bc, hA, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("uplo", "n", "lda", "batch_c"); rocsolver_bench_output(uploC, n, lda, bc); } else if(STRIDED) { rocsolver_bench_output("uplo", "n", "lda", "strideA", "batch_c"); rocsolver_bench_output(uploC, n, lda, stA, bc); } else { rocsolver_bench_output("uplo", "n", "lda"); rocsolver_bench_output(uploC, n, lda); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } } hipSOLVER-rocm-5.5.1/clients/include/testing_potri.hpp000066400000000000000000000515631436107207300226770ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void potri_checkBadArgs(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, U dWork, const int lwork, V dinfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_potri(FORTRAN, nullptr, uplo, n, dA, lda, stA, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS( hipsolver_potri( FORTRAN, handle, hipsolverFillMode_t(-1), n, dA, lda, stA, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS( hipsolver_potri(FORTRAN, handle, uplo, n, (T) nullptr, lda, stA, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_potri(FORTRAN, handle, uplo, n, dA, lda, stA, dWork, lwork, (V) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_potri_bad_arg() { // safe arguments hipsolver_local_handle handle; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; int n = 1; int lda = 1; int stA = 1; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dinfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dinfo.memcheck()); // int size_W; // hipsolver_potri_bufferSize(FORTRAN, handle, uplo, n, dA.data(), lda, &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // potri_checkBadArgs( // handle, uplo, n, dA.data(), lda, stA, dWork.data(), size_W, dinfo.data(), bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dinfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); int size_W; hipsolver_potri_bufferSize(FORTRAN, handle, uplo, n, dA.data(), lda, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments potri_checkBadArgs( handle, uplo, n, dA.data(), lda, stA, dWork.data(), size_W, dinfo.data(), bc); } } template void potri_initData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Ud& dInfo, const int bc, Th& hA, Uh& hInfo) { if(CPU) { rocblas_init(hA, true); for(rocblas_int b = 0; b < bc; ++b) { // scale to ensure positive definiteness for(rocblas_int i = 0; i < n; i++) hA[b][i + i * lda] = hA[b][i + i * lda] * conj(hA[b][i + i * lda]) * 400; // do the Cholesky factorization of matrix A w/ the reference LAPACK routine cblas_potrf(uplo, n, hA[b], lda, hInfo[b]); } } if(GPU) { // now copy data to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void potri_getError(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Vd& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Th& hARes, Uh& hInfo, Uh& hInfoRes, double* max_err) { // input data initialization potri_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_potri( FORTRAN, handle, uplo, n, dA.data(), lda, stA, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack for(int b = 0; b < bc; ++b) cblas_potri(uplo, n, hA[b], lda, hInfo[b]); // check info for singularities double err = 0; *max_err = 0; for(rocblas_int b = 0; b < bc; ++b) { if(hInfo[b][0] != hInfoRes[b][0]) err++; } *max_err += err; // error is ||hA - hARes|| / ||hA|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm for(rocblas_int b = 0; b < bc; ++b) { if(hInfoRes[b][0] == 0) { if(uplo == HIPSOLVER_FILL_MODE_UPPER) err = norm_error_upperTr('F', n, n, lda, hA[b], hARes[b]); else err = norm_error_lowerTr('F', n, n, lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; } } } template void potri_getPerfData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Vd& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { if(!perf) { potri_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_potri(uplo, n, hA[b], lda, hInfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } potri_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); // cold calls for(int iter = 0; iter < 2; iter++) { potri_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); CHECK_ROCBLAS_ERROR(hipsolver_potri( FORTRAN, handle, uplo, n, dA.data(), lda, stA, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { potri_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo); start = get_time_us_sync(stream); hipsolver_potri( FORTRAN, handle, uplo, n, dA.data(), lda, stA, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_potri(Arguments& argus) { // get arguments hipsolver_local_handle handle; char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int stA = argus.get("strideA", lda * n); int bc = argus.batch_count; hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int hot_calls = argus.iters; rocblas_stride stARes = (argus.unit_check || argus.norm_check) ? stA : 0; // check non-supported values if(uplo != HIPSOLVER_FILL_MODE_UPPER && uplo != HIPSOLVER_FILL_MODE_LOWER) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_potri(FORTRAN, // handle, // uplo, // n, // (T**)nullptr, // lda, // stA, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_potri(FORTRAN, handle, uplo, n, (T*)nullptr, lda, stA, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_args); return; } // determine sizes size_t size_A = size_t(lda) * n; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_potri(FORTRAN, // handle, // uplo, // n, // (T**)nullptr, // lda, // stA, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_potri(FORTRAN, handle, uplo, n, (T*)nullptr, lda, stA, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_potri_bufferSize(FORTRAN, handle, uplo, n, (T*)nullptr, lda, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // host_strided_batch_vector hInfo(1, 1, 1, bc); // host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_strided_batch_vector dInfo(1, 1, 1, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // potri_getError(handle, // uplo, // n, // dA, // lda, // stA, // dWork, // size_W, // dInfo, // bc, // hA, // hARes, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // potri_getPerfData(handle, // uplo, // n, // dA, // lda, // stA, // dWork, // size_W, // dInfo, // bc, // hA, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) potri_getError(handle, uplo, n, dA, lda, stA, dWork, size_W, dInfo, bc, hA, hARes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) potri_getPerfData(handle, uplo, n, dA, lda, stA, dWork, size_W, dInfo, bc, hA, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("uplo", "n", "lda", "batch_c"); rocsolver_bench_output(uploC, n, lda, bc); } else if(STRIDED) { rocsolver_bench_output("uplo", "n", "lda", "strideA", "batch_c"); rocsolver_bench_output(uploC, n, lda, stA, bc); } else { rocsolver_bench_output("uplo", "n", "lda"); rocsolver_bench_output(uploC, n, lda); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } } hipSOLVER-rocm-5.5.1/clients/include/testing_potrs.hpp000066400000000000000000000713561436107207300227130ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void potrs_checkBadArgs(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, const int nrhs, T dA, const int lda, const int stA, T dB, const int ldb, const int stB, V dWork, const int lwork, U dInfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_potrs( API, nullptr, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_potrs(API, handle, hipsolverFillMode_t(-1), n, nrhs, dA, lda, stA, dB, ldb, stB, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_potrs(API, handle, uplo, n, nrhs, (T) nullptr, lda, stA, dB, ldb, stB, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_potrs(API, handle, uplo, n, nrhs, dA, lda, stA, (T) nullptr, ldb, stB, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_potrs( API, handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, dWork, lwork, (U) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_potrs_bad_arg() { // safe arguments hipsolver_local_handle handle; int n = 1; int nrhs = 1; int lda = 1; int ldb = 1; int stA = 1; int stB = 1; int bc = 1; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; if(BATCHED) { // memory allocations device_batch_vector dA(1, 1, 1); device_batch_vector dB(1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_potrs_bufferSize( API, handle, uplo, n, nrhs, dA.data(), lda, dB.data(), ldb, &size_W, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments potrs_checkBadArgs(handle, uplo, n, nrhs, dA.data(), lda, stA, dB.data(), ldb, stB, dWork.data(), size_W, dInfo.data(), bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dB(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_potrs_bufferSize( API, handle, uplo, n, nrhs, dA.data(), lda, dB.data(), ldb, &size_W, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments potrs_checkBadArgs(handle, uplo, n, nrhs, dA.data(), lda, stA, dB.data(), ldb, stB, dWork.data(), size_W, dInfo.data(), bc); } } template void potrs_initData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, const int nrhs, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, const int bc, Th& hA, Th& hB) { if(CPU) { rocblas_init(hA, true); rocblas_init(hB, true); int info; for(int b = 0; b < bc; ++b) { // scale to ensure positive definiteness for(int i = 0; i < n; i++) hA[b][i + i * lda] = hA[b][i + i * lda] * conj(hA[b][i + i * lda]) * 400; // do the Cholesky factorization of matrix A w/ the reference LAPACK routine cblas_potrf(uplo, n, hA[b], lda, &info); } } if(GPU) { // now copy matrices to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); } } template void potrs_getError(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, const int nrhs, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, Vd& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Th& hB, Th& hBRes, Uh& hInfo, Uh& hInfoRes, double* max_err) { // input data initialization potrs_initData(handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, bc, hA, hB); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_potrs(API, handle, uplo, n, nrhs, dA.data(), lda, stA, dB.data(), ldb, stB, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hBRes.transfer_from(dB)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack for(int b = 0; b < bc; ++b) { cblas_potrs(uplo, n, nrhs, hA[b], lda, hB[b], ldb, hInfo[b]); } // error is ||hB - hBRes|| / ||hB|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using vector-induced infinity norm double err; *max_err = 0; for(int b = 0; b < bc; ++b) { err = norm_error('I', n, nrhs, ldb, hB[b], hBRes[b]); *max_err = err > *max_err ? err : *max_err; } // check info err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) err++; *max_err += err; } template void potrs_getPerfData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, const int nrhs, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, Vd& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Th& hB, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { if(!perf) { potrs_initData( handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, bc, hA, hB); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) { cblas_potrs(uplo, n, nrhs, hA[b], lda, hB[b], ldb, hInfo[b]); } *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } potrs_initData(handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, bc, hA, hB); // cold calls for(int iter = 0; iter < 2; iter++) { potrs_initData( handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, bc, hA, hB); CHECK_ROCBLAS_ERROR(hipsolver_potrs(API, handle, uplo, n, nrhs, dA.data(), lda, stA, dB.data(), ldb, stB, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { potrs_initData( handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, bc, hA, hB); start = get_time_us_sync(stream); hipsolver_potrs(API, handle, uplo, n, nrhs, dA.data(), lda, stA, dB.data(), ldb, stB, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_potrs(Arguments& argus) { // get arguments hipsolver_local_handle handle; char uploC = argus.get("uplo"); int n = argus.get("n"); int nrhs = argus.get("nrhs", n); int lda = argus.get("lda", n); int ldb = argus.get("ldb", n); int stA = argus.get("strideA", lda * n); int stB = argus.get("strideB", ldb * nrhs); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int bc = argus.batch_count; int hot_calls = argus.iters; int stBRes = (argus.unit_check || argus.norm_check) ? stB : 0; // check non-supported values if(uplo != HIPSOLVER_FILL_MODE_UPPER && uplo != HIPSOLVER_FILL_MODE_LOWER) { if(BATCHED) { EXPECT_ROCBLAS_STATUS(hipsolver_potrs(API, handle, uplo, n, nrhs, (T**)nullptr, lda, stA, (T**)nullptr, ldb, stB, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_potrs(API, handle, uplo, n, nrhs, (T*)nullptr, lda, stA, (T*)nullptr, ldb, stB, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_args); return; } // determine sizes size_t size_A = size_t(lda) * n; size_t size_B = size_t(ldb) * nrhs; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_BRes = (argus.unit_check || argus.norm_check) ? size_B : 0; // check invalid sizes bool invalid_size = (n < 0 || nrhs < 0 || lda < n || ldb < n || bc < 0); if(invalid_size) { if(BATCHED) { EXPECT_ROCBLAS_STATUS(hipsolver_potrs(API, handle, uplo, n, nrhs, (T**)nullptr, lda, stA, (T**)nullptr, ldb, stB, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_potrs(API, handle, uplo, n, nrhs, (T*)nullptr, lda, stA, (T*)nullptr, ldb, stB, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; if(BATCHED) hipsolver_potrs_bufferSize( API, handle, uplo, n, nrhs, (T**)nullptr, lda, (T**)nullptr, ldb, &size_W, bc); else hipsolver_potrs_bufferSize( API, handle, uplo, n, nrhs, (T*)nullptr, lda, (T*)nullptr, ldb, &size_W, bc); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // memory allocations host_batch_vector hA(size_A, 1, bc); host_batch_vector hB(size_B, 1, bc); host_batch_vector hBRes(size_BRes, 1, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_batch_vector dA(size_A, 1, bc); device_batch_vector dB(size_B, 1, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_B) CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) potrs_getError(handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, dWork, size_W, dInfo, bc, hA, hB, hBRes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) potrs_getPerfData(handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, dWork, size_W, dInfo, bc, hA, hB, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hB(size_B, 1, stB, bc); host_strided_batch_vector hBRes(size_BRes, 1, stBRes, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dB(size_B, 1, stB, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_B) CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) potrs_getError(handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, dWork, size_W, dInfo, bc, hA, hB, hBRes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) potrs_getPerfData(handle, uplo, n, nrhs, dA, lda, stA, dB, ldb, stB, dWork, size_W, dInfo, bc, hA, hB, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using m * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("uplo", "n", "nrhs", "lda", "ldb", "batch_c"); rocsolver_bench_output(uploC, n, nrhs, lda, ldb, bc); } else if(STRIDED) { rocsolver_bench_output( "uplo", "n", "nrhs", "lda", "ldb", "strideA", "strideB", "batch_c"); rocsolver_bench_output(uploC, n, nrhs, lda, ldb, stA, stB, bc); } else { rocsolver_bench_output("uplo", "n", "nrhs", "lda", "ldb"); rocsolver_bench_output(uploC, n, nrhs, lda, ldb); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_syevd_heevd.hpp000066400000000000000000001006031436107207300240350ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void syevd_heevd_checkBadArgs(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, S dD, const int stD, T dWork, const int lwork, U dinfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_syevd_heevd( FORTRAN, nullptr, evect, uplo, n, dA, lda, stA, dD, stD, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_syevd_heevd(FORTRAN, handle, hipsolverEigMode_t(-1), uplo, n, dA, lda, stA, dD, stD, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_syevd_heevd(FORTRAN, handle, evect, hipsolverFillMode_t(-1), n, dA, lda, stA, dD, stD, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_syevd_heevd(FORTRAN, handle, evect, uplo, n, (T) nullptr, lda, stA, dD, stD, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_syevd_heevd(FORTRAN, handle, evect, uplo, n, dA, lda, stA, (S) nullptr, stD, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_syevd_heevd( FORTRAN, handle, evect, uplo, n, dA, lda, stA, dD, stD, dWork, lwork, (U) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_syevd_heevd_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; hipsolverEigMode_t evect = HIPSOLVER_EIG_MODE_NOVECTOR; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_LOWER; int n = 1; int lda = 1; int stA = 1; int stD = 1; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dD(1, 1, 1, 1); // device_strided_batch_vector dinfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dD.memcheck()); // CHECK_HIP_ERROR(dinfo.memcheck()); // int size_W; // hipsolver_syevd_heevd_bufferSize( // FORTRAN, handle, evect, uplo, n, dA.data(), lda, dD.data(), &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // syevd_heevd_checkBadArgs(handle, // evect, // uplo, // n, // dA.data(), // lda, // stA, // dD.data(), // stD, // dWork.data(), // size_W, // dinfo.data(), // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dD(1, 1, 1, 1); device_strided_batch_vector dinfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); int size_W; hipsolver_syevd_heevd_bufferSize( FORTRAN, handle, evect, uplo, n, dA.data(), lda, dD.data(), &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments syevd_heevd_checkBadArgs(handle, evect, uplo, n, dA.data(), lda, stA, dD.data(), stD, dWork.data(), size_W, dinfo.data(), bc); } } template void syevd_heevd_initData(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const int n, Td& dA, const int lda, const int bc, Th& hA, std::vector& A, bool test = true) { if(CPU) { rocblas_init(hA, true); // scale A to avoid singularities for(int b = 0; b < bc; ++b) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // make copy of original data to test vectors if required if(test && evect == HIPSOLVER_EIG_MODE_VECTOR) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) A[b * lda * n + i + j * lda] = hA[b][i + j * lda]; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void syevd_heevd_getError(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Sd& dD, const int stD, Td& dWork, const int lwork, Id& dinfo, const int bc, Th& hA, Th& hAres, Sh& hD, Sh& hDres, Ih& hinfo, Ih& hinfoRes, double* max_err) { constexpr bool COMPLEX = is_complex; using S = decltype(std::real(T{})); int sizeE, ltwork; if(!COMPLEX) { sizeE = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 2 * n + 1 : 1 + 6 * n + 2 * n * n); ltwork = 0; } else { sizeE = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n : 1 + 5 * n + 2 * n * n); ltwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n + 1 : 2 * n + n * n); } int liwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 1 : 3 + 5 * n); std::vector work(ltwork); std::vector hE(sizeE); std::vector iwork(liwork); std::vector A(lda * n * bc); // input data initialization syevd_heevd_initData(handle, evect, n, dA, lda, bc, hA, A); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_syevd_heevd(FORTRAN, handle, evect, uplo, n, dA.data(), lda, stA, dD.data(), stD, dWork.data(), lwork, dinfo.data(), bc)); CHECK_HIP_ERROR(hDres.transfer_from(dD)); CHECK_HIP_ERROR(hinfoRes.transfer_from(dinfo)); if(evect == HIPSOLVER_EIG_MODE_VECTOR) CHECK_HIP_ERROR(hAres.transfer_from(dA)); // CPU lapack for(int b = 0; b < bc; ++b) cblas_syevd_heevd(evect, uplo, n, hA[b], lda, hD[b], work.data(), ltwork, hE.data(), sizeE, iwork.data(), liwork, hinfo[b]); // Check info for non-convergence *max_err = 0; for(int b = 0; b < bc; ++b) if(hinfo[b][0] != hinfoRes[b][0]) *max_err += 1; // (We expect the used input matrices to always converge. Testing // implicitly the equivalent non-converged matrix is very complicated and it boils // down to essentially run the algorithm again and until convergence is achieved). double err = 0; for(int b = 0; b < bc; ++b) { if(evect != HIPSOLVER_EIG_MODE_VECTOR) { // only eigenvalues needed; can compare with LAPACK // error is ||hD - hDRes|| / ||hD|| // using frobenius norm if(hinfo[b][0] == 0) err = norm_error('F', 1, n, 1, hD[b], hDres[b]); *max_err = err > *max_err ? err : *max_err; } else { // both eigenvalues and eigenvectors needed; need to implicitly test // eigenvectors due to non-uniqueness of eigenvectors under scaling if(hinfo[b][0] == 0) { // multiply A with each of the n eigenvectors and divide by corresponding // eigenvalues T alpha; T beta = 0; for(int j = 0; j < n; j++) { alpha = T(1) / hDres[b][j]; cblas_symv_hemv(uplo, n, alpha, A.data() + b * lda * n, lda, hAres[b] + j * lda, 1, beta, hA[b] + j * lda, 1); } // error is ||hA - hARes|| / ||hA|| // using frobenius norm err = norm_error('F', n, n, lda, hA[b], hAres[b]); *max_err = err > *max_err ? err : *max_err; } } } } template void syevd_heevd_getPerfData(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Sd& dD, const int stD, Td& dWork, const int lwork, Id& dinfo, const int bc, Th& hA, Sh& hD, Ih& hinfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { constexpr bool COMPLEX = is_complex; using S = decltype(std::real(T{})); int sizeE, ltwork; if(!COMPLEX) { sizeE = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 2 * n + 1 : 1 + 6 * n + 2 * n * n); ltwork = 0; } else { sizeE = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n : 1 + 5 * n + 2 * n * n); ltwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n + 1 : 2 * n + n * n); } int liwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 1 : 3 + 5 * n); std::vector work(ltwork); std::vector hE(sizeE); std::vector iwork(liwork); std::vector A; if(!perf) { syevd_heevd_initData(handle, evect, n, dA, lda, bc, hA, A, 0); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_syevd_heevd(evect, uplo, n, hA[b], lda, hD[b], work.data(), ltwork, hE.data(), sizeE, iwork.data(), liwork, hinfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } syevd_heevd_initData(handle, evect, n, dA, lda, bc, hA, A, 0); // cold calls for(int iter = 0; iter < 2; iter++) { syevd_heevd_initData(handle, evect, n, dA, lda, bc, hA, A, 0); CHECK_ROCBLAS_ERROR(hipsolver_syevd_heevd(FORTRAN, handle, evect, uplo, n, dA.data(), lda, stA, dD.data(), stD, dWork.data(), lwork, dinfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { syevd_heevd_initData(handle, evect, n, dA, lda, bc, hA, A, 0); start = get_time_us_sync(stream); hipsolver_syevd_heevd(FORTRAN, handle, evect, uplo, n, dA.data(), lda, stA, dD.data(), stD, dWork.data(), lwork, dinfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_syevd_heevd(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; char evectC = argus.get("jobz"); char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int stA = argus.get("strideA", lda * n); int stD = argus.get("strideD", n); hipsolverEigMode_t evect = char2hipsolver_evect(evectC); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int bc = argus.batch_count; int hot_calls = argus.iters; // determine sizes size_t size_A = size_t(lda) * n; size_t size_D = n; size_t size_Ares = (argus.unit_check || argus.norm_check) ? size_A : 0; size_t size_Dres = (argus.unit_check || argus.norm_check) ? size_D : 0; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_syevd_heevd(FORTRAN, // handle, // evect, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // (S*)nullptr, // stD, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_syevd_heevd(FORTRAN, handle, evect, uplo, n, (T*)nullptr, lda, stA, (S*)nullptr, stD, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_syevd_heevd_bufferSize( FORTRAN, handle, evect, uplo, n, (T*)nullptr, lda, (S*)nullptr, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations (all cases) // host host_strided_batch_vector hD(size_D, 1, stD, bc); host_strided_batch_vector hinfo(1, 1, 1, bc); host_strided_batch_vector hinfoRes(1, 1, 1, bc); host_strided_batch_vector hDres(size_Dres, 1, stD, bc); // device device_strided_batch_vector dD(size_D, 1, stD, bc); device_strided_batch_vector dinfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_D) CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hAres(size_Ares, 1, bc); // device_batch_vector dA(size_A, 1, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // { // syevd_heevd_getError(handle, // evect, // uplo, // n, // dA, // lda, // stA, // dD, // stD, // dWork, // size_W, // dinfo, // bc, // hA, // hAres, // hD, // hDres, // hinfo, // hinfoRes, // &max_error); // } // // collect performance data // if(argus.timing) // { // syevd_heevd_getPerfData(handle, // evect, // uplo, // n, // dA, // lda, // stA, // dD, // stD, // dWork, // size_W, // dinfo, // bc, // hA, // hD, // hinfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); // } } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hAres(size_Ares, 1, stA, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); // check computations if(argus.unit_check || argus.norm_check) { syevd_heevd_getError(handle, evect, uplo, n, dA, lda, stA, dD, stD, dWork, size_W, dinfo, bc, hA, hAres, hD, hDres, hinfo, hinfoRes, &max_error); } // collect performance data if(argus.timing) { syevd_heevd_getPerfData(handle, evect, uplo, n, dA, lda, stA, dD, stD, dWork, size_W, dinfo, bc, hA, hD, hinfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } } // validate results for rocsolver-test // using n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("jobz", "uplo", "n", "lda", "strideD", "batch_c"); rocsolver_bench_output(evectC, uploC, n, lda, stD, bc); } else if(STRIDED) { rocsolver_bench_output("jobz", "uplo", "n", "lda", "strideA", "strideD", "batch_c"); rocsolver_bench_output(evectC, uploC, n, lda, stA, stD, bc); } else { rocsolver_bench_output("jobz", "uplo", "n", "lda"); rocsolver_bench_output(evectC, uploC, n, lda); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_syevdx_heevdx.hpp000066400000000000000000001374021436107207300244240ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void syevdx_heevdx_checkBadArgs(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const hipsolverEigRange_t erange, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, const SS vl, const SS vu, const int il, const int iu, U hNev, S dW, const int stW, T dWork, const int lwork, U dinfo, const int bc) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // handle EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, nullptr, evect, erange, uplo, n, dA, lda, stA, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, handle, hipsolverEigMode_t(-1), erange, uplo, n, dA, lda, stA, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, handle, evect, hipsolverEigRange_t(-1), uplo, n, dA, lda, stA, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, handle, evect, erange, hipsolverFillMode_t(-1), n, dA, lda, stA, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); // pointers EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, handle, evect, erange, uplo, n, (T) nullptr, lda, stA, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, handle, evect, erange, uplo, n, dA, lda, stA, vl, vu, il, iu, (U) nullptr, dW, stW, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, handle, evect, erange, uplo, n, dA, lda, stA, vl, vu, il, iu, hNev, (S) nullptr, stW, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, handle, evect, erange, uplo, n, dA, lda, stA, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, (U) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_syevdx_heevdx_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; hipsolverEigMode_t evect = HIPSOLVER_EIG_MODE_VECTOR; hipsolverEigRange_t erange = HIPSOLVER_EIG_RANGE_V; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_LOWER; int n = 1; int lda = 1; int stA = 1; int stW = 1; int bc = 1; S vl = 0.0; S vu = 1.0; int il = 0; int iu = 0; if(BATCHED) { // // memory allocations // host_strided_batch_vector hNev(1, 1, 1, 1); // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dW(1, 1, 1, 1); // device_strided_batch_vector dinfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dW.memcheck()); // CHECK_HIP_ERROR(dinfo.memcheck()); // int size_W; // hipsolver_syevdx_heevdx_bufferSize(API, // handle, // evect, // erange, // uplo, // n, // dA.data(), // lda, // vl, // vu, // il, // iu, // hNev.data(), // dW.data(), // &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // syevdx_heevdx_checkBadArgs(handle, // evect, // erange, // uplo, // n, // dA.data(), // lda, // stA, // vl, // vu, // il, // iu, // hNev.data(), // dW.data(), // stW, // dWork.data(), // size_W, // dinfo.data(), // bc); } else { // memory allocations host_strided_batch_vector hNev(1, 1, 1, 1); device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dW(1, 1, 1, 1); device_strided_batch_vector dinfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dW.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); int size_W; hipsolver_syevdx_heevdx_bufferSize(API, handle, evect, erange, uplo, n, dA.data(), lda, vl, vu, il, iu, hNev.data(), dW.data(), &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments syevdx_heevdx_checkBadArgs(handle, evect, erange, uplo, n, dA.data(), lda, stA, vl, vu, il, iu, hNev.data(), dW.data(), stW, dWork.data(), size_W, dinfo.data(), bc); } } template void syevdx_heevdx_initData(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const int n, Td& dA, const int lda, const int bc, Th& hA, std::vector& A, bool test = true) { if(CPU) { rocblas_init(hA, true); // scale A to avoid singularities for(int b = 0; b < bc; ++b) { for(int i = 0; i < n; i++) { for(int j = i; j < n; j++) { if(i == j) hA[b][i + j * lda] = std::real(hA[b][i + j * lda]) + 10; else { if(j == i + 1) { hA[b][i + j * lda] = (hA[b][i + j * lda] - 5) / 10; hA[b][j + i * lda] = conj(hA[b][i + j * lda]); } else hA[b][j + i * lda] = hA[b][i + j * lda] = 0; } } if(i == n / 4 || i == n / 2 || i == n - 1 || i == n / 7 || i == n / 5 || i == n / 3) hA[b][i + i * lda] *= -1; } // make copy of original data to test vectors if required if(test && evect == HIPSOLVER_EIG_MODE_VECTOR) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) A[b * lda * n + i + j * lda] = hA[b][i + j * lda]; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void syevdx_heevdx_getError(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const hipsolverEigRange_t erange, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, const S vl, const S vu, const int il, const int iu, Ih& hNevRes, Sd& dW, const int stW, Td& dWork, const int lwork, Id& dinfo, const int bc, Th& hA, Th& hARes, Ih& hNev, Sh& hW, Sh& hWRes, Ih& hinfo, Ih& hinfoRes, double* max_err) { constexpr bool COMPLEX = is_complex; int size_work = !COMPLEX ? 35 * n : 33 * n; int size_rwork = !COMPLEX ? 0 : 7 * n; int size_iwork = 5 * n; std::vector work(size_work); std::vector rwork(size_rwork); std::vector iwork(size_iwork); std::vector A(lda * n * bc); std::vector Z(lda * n); std::vector ifail(n); S abstol = 2 * get_safemin(); // input data initialization syevdx_heevdx_initData(handle, evect, n, dA, lda, bc, hA, A); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_syevdx_heevdx(API, handle, evect, erange, uplo, n, dA.data(), lda, stA, vl, vu, il, iu, hNevRes.data(), dW.data(), stW, dWork.data(), lwork, dinfo.data(), bc)); CHECK_HIP_ERROR(hWRes.transfer_from(dW)); CHECK_HIP_ERROR(hinfoRes.transfer_from(dinfo)); if(evect == HIPSOLVER_EIG_MODE_VECTOR) CHECK_HIP_ERROR(hARes.transfer_from(dA)); // CPU lapack for(int b = 0; b < bc; ++b) cblas_syevx_heevx(evect, erange, uplo, n, hA[b], lda, vl, vu, il, iu, abstol, hNev[b], hW[b], Z.data(), lda, work.data(), size_work, rwork.data(), iwork.data(), ifail.data(), hinfo[b]); // Check info for non-convergence *max_err = 0; for(int b = 0; b < bc; ++b) if(hinfo[b][0] != hinfoRes[b][0]) *max_err += 1; // Check number of returned eigenvalues *max_err = 0; for(int b = 0; b < bc; ++b) if(hNev[b][0] != hNevRes[b][0]) *max_err += 1; // (We expect the used input matrices to always converge. Testing // implicitly the equivalent non-converged matrix is very complicated and it boils // down to essentially run the algorithm again and until convergence is achieved). double err = 0; for(int b = 0; b < bc; ++b) { if(evect != HIPSOLVER_EIG_MODE_VECTOR) { // only eigenvalues needed; can compare with LAPACK // error is ||hW - hWRes|| / ||hW|| // using frobenius norm if(hinfo[b][0] == 0) err = norm_error('F', 1, hNev[b][0], 1, hW[b], hWRes[b]); *max_err = err > *max_err ? err : *max_err; } else { // both eigenvalues and eigenvectors needed; need to implicitly test // eigenvectors due to non-uniqueness of eigenvectors under scaling if(hinfo[b][0] == 0) { // multiply A with each of the m eigenvectors and divide by corresponding // eigenvalues T alpha; T beta = 0; for(int j = 0; j < hNev[b][0]; j++) { alpha = T(1) / hWRes[b][j]; cblas_symv_hemv(uplo, n, alpha, A.data() + b * lda * n, lda, hARes[b] + j * lda, 1, beta, hA[b] + j * lda, 1); } // error is ||hA - hARes|| / ||hA|| // using frobenius norm err = norm_error('F', n, hNev[b][0], lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; } } } } template void syevdx_heevdx_getPerfData(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const hipsolverEigRange_t erange, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, const S vl, const S vu, const int il, const int iu, Ih& hNevRes, Sd& dW, const int stW, Td& dWork, const int lwork, Id& dinfo, const int bc, Th& hA, Ih& hNev, Sh& hW, Ih& hinfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { constexpr bool COMPLEX = is_complex; int size_work = !COMPLEX ? 35 * n : 33 * n; int size_rwork = !COMPLEX ? 0 : 7 * n; int size_iwork = 5 * n; std::vector work(size_work); std::vector rwork(size_rwork); std::vector iwork(size_iwork); std::vector A; std::vector Z(lda * n); std::vector ifail(n); S abstol = 2 * get_safemin(); if(!perf) { syevdx_heevdx_initData(handle, evect, n, dA, lda, bc, hA, A, 0); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_syevx_heevx(evect, erange, uplo, n, hA[b], lda, vl, vu, il, iu, abstol, hNev[b], hW[b], Z.data(), lda, work.data(), size_work, rwork.data(), iwork.data(), ifail.data(), hinfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } syevdx_heevdx_initData(handle, evect, n, dA, lda, bc, hA, A, 0); // cold calls for(int iter = 0; iter < 2; iter++) { syevdx_heevdx_initData(handle, evect, n, dA, lda, bc, hA, A, 0); CHECK_ROCBLAS_ERROR(hipsolver_syevdx_heevdx(API, handle, evect, erange, uplo, n, dA.data(), lda, stA, vl, vu, il, iu, hNevRes.data(), dW.data(), stW, dWork.data(), lwork, dinfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { syevdx_heevdx_initData(handle, evect, n, dA, lda, bc, hA, A, 0); start = get_time_us_sync(stream); hipsolver_syevdx_heevdx(API, handle, evect, erange, uplo, n, dA.data(), lda, stA, vl, vu, il, iu, hNevRes.data(), dW.data(), stW, dWork.data(), lwork, dinfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_syevdx_heevdx(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; char evectC = argus.get("jobz"); char erangeC = argus.get("range"); char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int stA = argus.get("strideA", lda * n); int stW = argus.get("strideW", n); S vl = S(argus.get("vl", 0)); S vu = S(argus.get("vu", erangeC == 'V' ? 1 : 0)); int il = argus.get("il", erangeC == 'I' ? 1 : 0); int iu = argus.get("iu", erangeC == 'I' ? 1 : 0); hipsolverEigMode_t evect = char2hipsolver_evect(evectC); hipsolverEigRange_t erange = char2hipsolver_erange(erangeC); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int bc = argus.batch_count; int hot_calls = argus.iters; // determine sizes size_t size_A = size_t(lda) * n; size_t size_W = n; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; size_t size_WRes = (argus.unit_check || argus.norm_check) ? size_W : 0; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || bc < 0 || (erange == HIPSOLVER_EIG_RANGE_V && vl >= vu) || (erange == HIPSOLVER_EIG_RANGE_I && (il < 1 || iu < 0)) || (erange == HIPSOLVER_EIG_RANGE_I && (iu > n || (n > 0 && il > iu)))); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, // handle, // evect, // erange, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // vl, // vu, // il, // iu, // (int*)nullptr, // (S*)nullptr, // stW, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_syevdx_heevdx(API, handle, evect, erange, uplo, n, (T*)nullptr, lda, stA, vl, vu, il, iu, (int*)nullptr, (S*)nullptr, stW, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_Work; hipsolver_syevdx_heevdx_bufferSize(API, handle, evect, erange, uplo, n, (T*)nullptr, lda, vl, vu, il, iu, (int*)nullptr, (S*)nullptr, &size_Work); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_Work); return; } // memory allocations (all cases) // host host_strided_batch_vector hNev(1, 1, 1, bc); host_strided_batch_vector hNevRes(1, 1, 1, bc); host_strided_batch_vector hW(size_W, 1, stW, bc); host_strided_batch_vector hinfo(1, 1, 1, bc); host_strided_batch_vector hinfoRes(1, 1, 1, bc); host_strided_batch_vector hWres(size_WRes, 1, stW, bc); // device device_strided_batch_vector dW(size_W, 1, stW, bc); device_strided_batch_vector dinfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_Work, 1, size_Work, bc); if(size_W) CHECK_HIP_ERROR(dW.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); if(size_Work) CHECK_HIP_ERROR(dWork.memcheck()); if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // device_batch_vector dA(size_A, 1, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // { // syevdx_heevdx_getError(handle, // evect, // erange, // uplo, // n, // dA, // lda, // stA, // vl, // vu, // il, // iu, // hNevRes, // dW, // stW, // dWork, // size_Work, // dinfo, // bc, // hA, // hARes, // hNev, // hW, // hWres, // hinfo, // hinfoRes, // &max_error); // } // // collect performance data // if(argus.timing) // { // syevdx_heevdx_getPerfData(handle, // evect, // erange, // uplo, // n, // dA, // lda, // stA, // vl, // vu, // il, // iu, // hNevRes, // dW, // stW, // dWork, // size_Work, // dinfo, // bc, // hA, // hNev, // hW, // hinfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); // } } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stA, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); // check computations if(argus.unit_check || argus.norm_check) { syevdx_heevdx_getError(handle, evect, erange, uplo, n, dA, lda, stA, vl, vu, il, iu, hNevRes, dW, stW, dWork, size_Work, dinfo, bc, hA, hARes, hNev, hW, hWres, hinfo, hinfoRes, &max_error); } // collect performance data if(argus.timing) { syevdx_heevdx_getPerfData(handle, evect, erange, uplo, n, dA, lda, stA, vl, vu, il, iu, hNevRes, dW, stW, dWork, size_Work, dinfo, bc, hA, hNev, hW, hinfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } } // validate results for rocsolver-test // using n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("jobz", "range", "uplo", "n", "lda", "vl", "vu", "il", "iu", "strideW", "batch_c"); rocsolver_bench_output(evectC, erangeC, uploC, n, lda, vl, vu, il, iu, stW, bc); } else if(STRIDED) { rocsolver_bench_output("jobz", "range", "uplo", "n", "lda", "strideA", "vl", "vu", "il", "iu", "strideW", "batch_c"); rocsolver_bench_output( evectC, erangeC, uploC, n, lda, stA, vl, vu, il, iu, stW, bc); } else { rocsolver_bench_output("jobz", "range", "uplo", "n", "lda", "vl", "vu", "il", "iu"); rocsolver_bench_output(evectC, erangeC, uploC, n, lda, vl, vu, il, iu); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_syevj_heevj.hpp000066400000000000000000001153661436107207300240650ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void syevj_heevj_checkBadArgs(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, S dD, const int stD, T dWork, const int lwork, U dinfo, const hipsolverSyevjInfo_t params, const int bc) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // handle EXPECT_ROCBLAS_STATUS(hipsolver_syevj_heevj(API, STRIDED, nullptr, evect, uplo, n, dA, lda, stA, dD, stD, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_syevj_heevj(API, STRIDED, handle, hipsolverEigMode_t(-1), uplo, n, dA, lda, stA, dD, stD, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_syevj_heevj(API, STRIDED, handle, evect, hipsolverFillMode_t(-1), n, dA, lda, stA, dD, stD, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_INVALID_ENUM); // pointers EXPECT_ROCBLAS_STATUS(hipsolver_syevj_heevj(API, STRIDED, handle, evect, uplo, n, (T) nullptr, lda, stA, dD, stD, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_syevj_heevj(API, STRIDED, handle, evect, uplo, n, dA, lda, stA, (S) nullptr, stD, dWork, lwork, dinfo, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_syevj_heevj(API, STRIDED, handle, evect, uplo, n, dA, lda, stA, dD, stD, dWork, lwork, (U) nullptr, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_syevj_heevj_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; hipsolver_local_syevj_info params; hipsolverEigMode_t evect = HIPSOLVER_EIG_MODE_NOVECTOR; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_LOWER; int n = 1; int lda = 1; int stA = 1; int stD = 1; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dD(1, 1, 1, 1); // device_strided_batch_vector dinfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dD.memcheck()); // CHECK_HIP_ERROR(dinfo.memcheck()); // int size_W; // hipsolver_syevj_heevj_bufferSize( // API, handle, evect, uplo, n, dA.data(), lda, dD.data(), &size_W, params, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // syevj_heevj_checkBadArgs(handle, // evect, // uplo, // n, // dA.data(), // lda, // stA, // dD.data(), // stD, // dWork.data(), // size_W, // dinfo.data(), // params, // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dD(1, 1, 1, 1); device_strided_batch_vector dinfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); int size_W; hipsolver_syevj_heevj_bufferSize( API, STRIDED, handle, evect, uplo, n, dA.data(), lda, dD.data(), &size_W, params, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments syevj_heevj_checkBadArgs(handle, evect, uplo, n, dA.data(), lda, stA, dD.data(), stD, dWork.data(), size_W, dinfo.data(), params, bc); } } template void syevj_heevj_initData(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const int n, Td& dA, const int lda, const int bc, Th& hA, std::vector& A, bool test = true) { if(CPU) { rocblas_init(hA, true); // scale A to avoid singularities for(int b = 0; b < bc; ++b) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // make copy of original data to test vectors if required if(test) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) A[b * lda * n + i + j * lda] = hA[b][i + j * lda]; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void syevj_heevj_getError(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Sd& dD, const int stD, Td& dWork, const int lwork, Id& dinfo, hipsolverSyevjInfo_t params, const double abstol, const int max_sweeps, const int sort_eig, const int bc, Th& hA, Th& hAres, Sh& hD, Sh& hDres, Ih& hinfo, Ih& hinfoRes, Uh& hResidualRes, Ih& hSweepsRes, double* max_err) { constexpr bool COMPLEX = is_complex; using S = decltype(std::real(T{})); S atol = (abstol <= 0) ? get_epsilon() : abstol; int sizeE, ltwork; if(!COMPLEX) { sizeE = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 2 * n + 1 : 1 + 6 * n + 2 * n * n); ltwork = 0; } else { sizeE = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n : 1 + 5 * n + 2 * n * n); ltwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n + 1 : 2 * n + n * n); } int liwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 1 : 3 + 5 * n); std::vector work(ltwork); std::vector hE(sizeE); std::vector iwork(liwork); std::vector A(lda * n * bc); // input data initialization syevj_heevj_initData(handle, evect, n, dA, lda, bc, hA, A); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_syevj_heevj(API, STRIDED, handle, evect, uplo, n, dA.data(), lda, stA, dD.data(), stD, dWork.data(), lwork, dinfo.data(), params, bc)); CHECK_HIP_ERROR(hDres.transfer_from(dD)); CHECK_HIP_ERROR(hinfoRes.transfer_from(dinfo)); if(evect == HIPSOLVER_EIG_MODE_VECTOR) CHECK_HIP_ERROR(hAres.transfer_from(dA)); hipsolverXsyevjGetResidual(handle, params, hResidualRes.data()); hipsolverXsyevjGetSweeps(handle, params, hSweepsRes.data()); // CPU lapack for(int b = 0; b < bc; ++b) cblas_syevd_heevd(evect, uplo, n, hA[b], lda, hD[b], work.data(), ltwork, hE.data(), sizeE, iwork.data(), liwork, hinfo[b]); // Check info for non-convergence *max_err = 0; for(int b = 0; b < bc; ++b) if(hinfo[b][0] != hinfoRes[b][0]) *max_err += 1; if(!STRIDED) { // Also check validity of residual for(rocblas_int b = 0; b < bc; ++b) if(hResidualRes[b][0] < 0 || hResidualRes[b][0] > snorm('F', n, n, A.data() + b * lda * n, lda) * atol) *max_err += 1; // Also check validity of sweeps for(rocblas_int b = 0; b < bc; ++b) if(hSweepsRes[b][0] < 0 || hSweepsRes[b][0] > max_sweeps) *max_err += 1; } double err = 0; for(int b = 0; b < bc; ++b) { if(evect != HIPSOLVER_EIG_MODE_VECTOR) { // only eigenvalues needed; can compare with LAPACK // error is ||hD - hDRes|| / ||hD|| // using frobenius norm if(hinfo[b][0] == 0 && sort_eig) err = norm_error('F', 1, n, 1, hD[b], hDres[b]); *max_err = err > *max_err ? err : *max_err; } else { // both eigenvalues and eigenvectors needed; need to implicitly test // eigenvectors due to non-uniqueness of eigenvectors under scaling if(hinfo[b][0] == 0) { // multiply A with each of the n eigenvectors and divide by corresponding // eigenvalues T alpha; T beta = 0; for(int j = 0; j < n; j++) { alpha = T(1) / hDres[b][j]; cblas_symv_hemv(uplo, n, alpha, A.data() + b * lda * n, lda, hAres[b] + j * lda, 1, beta, hA[b] + j * lda, 1); } // error is ||hA - hARes|| / ||hA|| // using frobenius norm err = norm_error('F', n, n, lda, hA[b], hAres[b]); *max_err = err > *max_err ? err : *max_err; } } } } template void syevj_heevj_getPerfData(const hipsolverHandle_t handle, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Sd& dD, const int stD, Td& dWork, const int lwork, Id& dinfo, hipsolverSyevjInfo_t params, const int bc, Th& hA, Sh& hD, Ih& hinfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { constexpr bool COMPLEX = is_complex; using S = decltype(std::real(T{})); int sizeE, ltwork; if(!COMPLEX) { sizeE = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 2 * n + 1 : 1 + 6 * n + 2 * n * n); ltwork = 0; } else { sizeE = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n : 1 + 5 * n + 2 * n * n); ltwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n + 1 : 2 * n + n * n); } int liwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 1 : 3 + 5 * n); std::vector work(ltwork); std::vector hE(sizeE); std::vector iwork(liwork); std::vector A; if(!perf) { syevj_heevj_initData(handle, evect, n, dA, lda, bc, hA, A, 0); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_syevd_heevd(evect, uplo, n, hA[b], lda, hD[b], work.data(), ltwork, hE.data(), sizeE, iwork.data(), liwork, hinfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } syevj_heevj_initData(handle, evect, n, dA, lda, bc, hA, A, 0); // cold calls for(int iter = 0; iter < 2; iter++) { syevj_heevj_initData(handle, evect, n, dA, lda, bc, hA, A, 0); CHECK_ROCBLAS_ERROR(hipsolver_syevj_heevj(API, STRIDED, handle, evect, uplo, n, dA.data(), lda, stA, dD.data(), stD, dWork.data(), lwork, dinfo.data(), params, bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { syevj_heevj_initData(handle, evect, n, dA, lda, bc, hA, A, 0); start = get_time_us_sync(stream); hipsolver_syevj_heevj(API, STRIDED, handle, evect, uplo, n, dA.data(), lda, stA, dD.data(), stD, dWork.data(), lwork, dinfo.data(), params, bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_syevj_heevj(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; hipsolver_local_syevj_info params; char evectC = argus.get("jobz"); char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int stA = lda * n; int stD = n; double abstol = argus.get("tolerance", 2 * get_epsilon()); rocblas_int max_sweeps = argus.get("max_sweeps", 100); rocblas_int sort_eig = argus.get("sort_eig", 1); hipsolverXsyevjSetTolerance(params, abstol); hipsolverXsyevjSetMaxSweeps(params, max_sweeps); hipsolverXsyevjSetSortEig(params, sort_eig); hipsolverEigMode_t evect = char2hipsolver_evect(evectC); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int bc = argus.batch_count; int hot_calls = argus.iters; // determine sizes size_t size_A = size_t(lda) * n; size_t size_D = n; size_t size_Ares = (argus.unit_check || argus.norm_check) ? size_A : 0; size_t size_Dres = (argus.unit_check || argus.norm_check) ? size_D : 0; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_syevj_heevj(API, // STRIDED, // handle, // evect, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // (S*)nullptr, // stD, // (T*)nullptr, // 0, // (int*)nullptr, // params, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_syevj_heevj(API, STRIDED, handle, evect, uplo, n, (T*)nullptr, lda, stA, (S*)nullptr, stD, (T*)nullptr, 0, (int*)nullptr, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_syevj_heevj_bufferSize( API, STRIDED, handle, evect, uplo, n, (T*)nullptr, lda, (S*)nullptr, &size_W, params, bc); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations (all cases) // host host_strided_batch_vector hResidualRes(1, 1, 1, bc); host_strided_batch_vector hSweepsRes(1, 1, 1, bc); host_strided_batch_vector hD(size_D, 1, stD, bc); host_strided_batch_vector hinfo(1, 1, 1, bc); host_strided_batch_vector hinfoRes(1, 1, 1, bc); host_strided_batch_vector hDres(size_Dres, 1, stD, bc); // device device_strided_batch_vector dD(size_D, 1, stD, bc); device_strided_batch_vector dinfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_D) CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dinfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hAres(size_Ares, 1, bc); // device_batch_vector dA(size_A, 1, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // { // syevj_heevj_getError(handle, // evect, // uplo, // n, // dA, // lda, // stA, // dD, // stD, // dWork, // size_W, // dinfo, // params, // abstol, // max_sweeps, // sort_eig, // bc, // hA, // hAres, // hD, // hDres, // hinfo, // hinfoRes, // hResidualRes, // hSweepsRes, // &max_error); // } // // collect performance data // if(argus.timing) // { // syevj_heevj_getPerfData(handle, // evect, // uplo, // n, // dA, // lda, // stA, // dD, // stD, // dWork, // size_W, // dinfo, // params, // bc, // hA, // hD, // hinfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); // } } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hAres(size_Ares, 1, stA, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); // check computations if(argus.unit_check || argus.norm_check) { syevj_heevj_getError(handle, evect, uplo, n, dA, lda, stA, dD, stD, dWork, size_W, dinfo, params, abstol, max_sweeps, sort_eig, bc, hA, hAres, hD, hDres, hinfo, hinfoRes, hResidualRes, hSweepsRes, &max_error); } // collect performance data if(argus.timing) { syevj_heevj_getPerfData(handle, evect, uplo, n, dA, lda, stA, dD, stD, dWork, size_W, dinfo, params, bc, hA, hD, hinfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } } // validate results for rocsolver-test // using 4 * n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, 4 * n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("jobz", "uplo", "n", "lda", "strideD", "tolerance", "max_sweeps", "sort_eig", "batch_c"); rocsolver_bench_output( evectC, uploC, n, lda, stD, abstol, max_sweeps, sort_eig, bc); } else if(STRIDED) { rocsolver_bench_output("jobz", "uplo", "n", "lda", "strideA", "strideD", "tolerance", "max_sweeps", "sort_eig", "batch_c"); rocsolver_bench_output( evectC, uploC, n, lda, stA, stD, abstol, max_sweeps, sort_eig, bc); } else { rocsolver_bench_output( "jobz", "uplo", "n", "lda", "tolerance", "max_sweeps", "sort_eig"); rocsolver_bench_output(evectC, uploC, n, lda, abstol, max_sweeps, sort_eig); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_sygvd_hegvd.hpp000066400000000000000000001375621436107207300240570ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void sygvd_hegvd_checkBadArgs(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, T dB, const int ldb, const int stB, U dD, const int stD, T dWork, const int lwork, int* dInfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, nullptr, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, handle, hipsolverEigType_t(-1), evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, handle, itype, hipsolverEigMode_t(-1), uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, handle, itype, evect, hipsolverFillMode_t(-1), n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, handle, itype, evect, uplo, n, (T) nullptr, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, handle, itype, evect, uplo, n, dA, lda, stA, (T) nullptr, ldb, stB, dD, stD, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, handle, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, (U) nullptr, stD, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, handle, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_sygvd_hegvd_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; int n = 1; int lda = 1; int ldb = 1; int stA = 1; int stB = 1; int stD = 1; int bc = 1; hipsolverEigType_t itype = HIPSOLVER_EIG_TYPE_1; hipsolverEigMode_t evect = HIPSOLVER_EIG_MODE_NOVECTOR; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_batch_vector dB(1, 1, 1); // device_strided_batch_vector dD(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dB.memcheck()); // CHECK_HIP_ERROR(dD.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // int size_W; // hipsolver_sygvd_hegvd_bufferSize(FORTRAN, // handle, // itype, // evect, // uplo, // n, // dA.data(), // lda, // dB.data(), // ldb, // dD.data(), // &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // sygvd_hegvd_checkBadArgs(handle, // itype, // evect, // uplo, // n, // dA.data(), // lda, // stA, // dB.data(), // ldb, // stB, // dD.data(), // stD, // dWork.data(), // size_W, // dInfo.data(), // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dB(1, 1, 1, 1); device_strided_batch_vector dD(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_sygvd_hegvd_bufferSize(FORTRAN, handle, itype, evect, uplo, n, dA.data(), lda, dB.data(), ldb, dD.data(), &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments sygvd_hegvd_checkBadArgs(handle, itype, evect, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, dD.data(), stD, dWork.data(), size_W, dInfo.data(), bc); } } template void sygvd_hegvd_initData(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const int n, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, const int bc, Th& hA, Th& hB, host_strided_batch_vector& A, host_strided_batch_vector& B, const bool test, const bool singular) { if(CPU) { rocblas_init(hA, true); rocblas_init(hB, false); for(int b = 0; b < bc; ++b) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(i == j) { hA[b][i + j * lda] = std::real(hA[b][i + j * lda]) + 400; hB[b][i + j * ldb] = std::real(hB[b][i + j * ldb]) + 400; } else { hA[b][i + j * lda] -= 4; } } } // store A and B for testing purposes if(test && evect != HIPSOLVER_EIG_MODE_NOVECTOR) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(itype != HIPSOLVER_EIG_TYPE_3) { A[b][i + j * lda] = hA[b][i + j * lda]; B[b][i + j * ldb] = hB[b][i + j * ldb]; } else { A[b][i + j * lda] = hB[b][i + j * ldb]; B[b][i + j * ldb] = hA[b][i + j * lda]; } } } } } } if(GPU) { // now copy data to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); } } template void sygvd_hegvd_getError(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, Ud& dD, const int stD, Td& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Th& hARes, Th& hB, Uh& hD, Uh& hDRes, Vh& hInfo, Vh& hInfoRes, double* max_err, const bool singular) { constexpr bool COMPLEX = is_complex; using S = decltype(std::real(T{})); int lrwork, ltwork; if(!COMPLEX) { lrwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 2 * n + 1 : 1 + 6 * n + 2 * n * n); ltwork = 0; } else { lrwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n : 1 + 5 * n + 2 * n * n); ltwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n + 1 : 2 * n + n * n); } int liwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 1 : 3 + 5 * n); std::vector work(ltwork); std::vector rwork(lrwork); std::vector iwork(liwork); host_strided_batch_vector A(lda * n, 1, lda * n, bc); host_strided_batch_vector B(ldb * n, 1, ldb * n, bc); // input data initialization sygvd_hegvd_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, true, singular); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_sygvd_hegvd(FORTRAN, handle, itype, evect, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, dD.data(), stD, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hDRes.transfer_from(dD)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); if(evect != HIPSOLVER_EIG_MODE_NOVECTOR) CHECK_HIP_ERROR(hARes.transfer_from(dA)); // CPU lapack for(int b = 0; b < bc; ++b) { cblas_sygvd_hegvd(itype, evect, uplo, n, hA[b], lda, hB[b], ldb, hD[b], work.data(), ltwork, rwork.data(), lrwork, iwork.data(), liwork, hInfo[b]); } // (We expect the used input matrices to always converge. Testing // implicitly the equivalent non-converged matrix is very complicated and it boils // down to essentially run the algorithm again and until convergence is achieved. // We do test with indefinite matrices B). // check info for non-convergence and/or positive-definiteness *max_err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) *max_err += 1; double err; if(evect == HIPSOLVER_EIG_MODE_NOVECTOR) { // only eigenvalues needed; can compare with LAPACK // error is ||hD - hDRes|| / ||hD|| // using frobenius norm for(int b = 0; b < bc; ++b) { if(hInfoRes[b][0] == 0) { err = norm_error('F', 1, n, 1, hD[b], hDRes[b]); *max_err = err > *max_err ? err : *max_err; } } } else { // both eigenvalues and eigenvectors needed; need to implicitly test // eigenvectors due to non-uniqueness of eigenvectors under scaling for(int b = 0; b < bc; ++b) { if(hInfoRes[b][0] == 0) { T alpha = 1; T beta = 0; // hARes contains eigenvectors x // compute B*x (or A*x) and store in hB cblas_symm_hemm(HIPSOLVER_SIDE_LEFT, uplo, n, n, alpha, B[b], ldb, hARes[b], lda, beta, hB[b], ldb); if(itype == HIPSOLVER_EIG_TYPE_1) { // problem is A*x = (lambda)*B*x // compute (1/lambda)*A*x and store in hA for(int j = 0; j < n; j++) { alpha = T(1) / hDRes[b][j]; cblas_symv_hemv(uplo, n, alpha, A[b], lda, hARes[b] + j * lda, 1, beta, hA[b] + j * lda, 1); } // move B*x into hARes for(int i = 0; i < n; i++) for(int j = 0; j < n; j++) hARes[b][i + j * lda] = hB[b][i + j * ldb]; } else { // problem is A*B*x = (lambda)*x or B*A*x = (lambda)*x // compute (1/lambda)*A*B*x or (1/lambda)*B*A*x and store in hA for(int j = 0; j < n; j++) { alpha = T(1) / hDRes[b][j]; cblas_symv_hemv(uplo, n, alpha, A[b], lda, hB[b] + j * ldb, 1, beta, hA[b] + j * lda, 1); } } // error is ||hA - hARes|| / ||hA|| // using frobenius norm err = norm_error('F', n, n, lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; } } } } template void sygvd_hegvd_getPerfData(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, Ud& dD, const int stD, Td& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Th& hB, Uh& hD, Vh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf, const bool singular) { constexpr bool COMPLEX = is_complex; using S = decltype(std::real(T{})); int lrwork, ltwork; if(!COMPLEX) { lrwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 2 * n + 1 : 1 + 6 * n + 2 * n * n); ltwork = 0; } else { lrwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n : 1 + 5 * n + 2 * n * n); ltwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n + 1 : 2 * n + n * n); } int liwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 1 : 3 + 5 * n); std::vector work(ltwork); std::vector rwork(lrwork); std::vector iwork(liwork); host_strided_batch_vector A(1, 1, 1, 1); host_strided_batch_vector B(1, 1, 1, 1); if(!perf) { sygvd_hegvd_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false, singular); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) { cblas_sygvd_hegvd(itype, evect, uplo, n, hA[b], lda, hB[b], ldb, hD[b], work.data(), ltwork, rwork.data(), lrwork, iwork.data(), liwork, hInfo[b]); } *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } sygvd_hegvd_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false, singular); // cold calls for(int iter = 0; iter < 2; iter++) { sygvd_hegvd_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false, singular); CHECK_ROCBLAS_ERROR(hipsolver_sygvd_hegvd(FORTRAN, handle, itype, evect, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, dD.data(), stD, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { sygvd_hegvd_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false, singular); start = get_time_us_sync(stream); hipsolver_sygvd_hegvd(FORTRAN, handle, itype, evect, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, dD.data(), stD, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_sygvd_hegvd(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; char itypeC = argus.get("itype"); char evectC = argus.get("jobz"); char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int ldb = argus.get("ldb", n); int stA = argus.get("strideA", lda * n); int stB = argus.get("strideB", ldb * n); int stD = argus.get("strideD", n); hipsolverEigType_t itype = char2hipsolver_eform(itypeC); hipsolverEigMode_t evect = char2hipsolver_evect(evectC); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int bc = argus.batch_count; int hot_calls = argus.iters; int stARes = (argus.unit_check || argus.norm_check) ? stA : 0; int stDRes = (argus.unit_check || argus.norm_check) ? stD : 0; // determine sizes size_t size_A = size_t(lda) * n; size_t size_B = size_t(ldb) * n; size_t size_D = size_t(n); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; size_t size_DRes = (argus.unit_check || argus.norm_check) ? size_D : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || ldb < n || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, // handle, // itype, // evect, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // (T* const*)nullptr, // ldb, // stB, // (S*)nullptr, // stD, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_sygvd_hegvd(FORTRAN, handle, itype, evect, uplo, n, (T*)nullptr, lda, stA, (T*)nullptr, ldb, stB, (S*)nullptr, stD, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_sygvd_hegvd_bufferSize(FORTRAN, handle, itype, evect, uplo, n, (T*)nullptr, lda, (T*)nullptr, ldb, (S*)nullptr, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // host_batch_vector hB(size_B, 1, bc); // host_strided_batch_vector hD(size_D, 1, stD, bc); // host_strided_batch_vector hDRes(size_DRes, 1, stDRes, bc); // host_strided_batch_vector hInfo(1, 1, 1, bc); // host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_batch_vector dB(size_B, 1, bc); // device_strided_batch_vector dD(size_D, 1, stD, bc); // device_strided_batch_vector dInfo(1, 1, 1, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // if(size_B) // CHECK_HIP_ERROR(dB.memcheck()); // if(size_D) // CHECK_HIP_ERROR(dD.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // sygvd_hegvd_getError(handle, // itype, // evect, // uplo, // n, // dA, // lda, // stA, // dB, // ldb, // stB, // dD, // stD, // dWork, // size_W, // dInfo, // bc, // hA, // hARes, // hB, // hD, // hDRes, // hInfo, // hInfoRes, // &max_error, // argus.singular); // // collect performance data // if(argus.timing) // sygvd_hegvd_getPerfData(handle, // itype, // evect, // uplo, // n, // dA, // lda, // stA, // dB, // ldb, // stB, // dD, // stD, // dWork, // size_W, // dInfo, // bc, // hA, // hB, // hD, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf, // argus.singular); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); host_strided_batch_vector hB(size_B, 1, stB, bc); host_strided_batch_vector hD(size_D, 1, stD, bc); host_strided_batch_vector hDRes(size_DRes, 1, stDRes, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dB(size_B, 1, stB, bc); device_strided_batch_vector dD(size_D, 1, stD, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_B) CHECK_HIP_ERROR(dB.memcheck()); if(size_D) CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) sygvd_hegvd_getError(handle, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, size_W, dInfo, bc, hA, hARes, hB, hD, hDRes, hInfo, hInfoRes, &max_error, argus.singular); // collect performance data if(argus.timing) sygvd_hegvd_getPerfData(handle, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, size_W, dInfo, bc, hA, hB, hD, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf, argus.singular); } // validate results for rocsolver-test // using n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output( "itype", "evect", "uplo", "n", "lda", "ldb", "strideD", "batch_c"); rocsolver_bench_output(itypeC, evectC, uploC, n, lda, ldb, stD, bc); } else if(STRIDED) { rocsolver_bench_output("itype", "evect", "uplo", "n", "lda", "ldb", "strideA", "strideB", "strideD", "batch_c"); rocsolver_bench_output(itypeC, evectC, uploC, n, lda, ldb, stA, stB, stD, bc); } else { rocsolver_bench_output("itype", "evect", "uplo", "n", "lda", "ldb"); rocsolver_bench_output(itypeC, evectC, uploC, n, lda, ldb); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_sygvdx_hegvdx.hpp000066400000000000000000001775471436107207300244460ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void sygvdx_hegvdx_checkBadArgs(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const hipsolverEigRange_t erange, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, T dB, const int ldb, const int stB, const S vl, const S vu, const int il, const int iu, int* hNev, U dW, const int stW, T dWork, const int lwork, int* dInfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, nullptr, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, hipsolverEigType_t(-1), evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, itype, hipsolverEigMode_t(-1), erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, hipsolverEigRange_t(-1), uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, hipsolverFillMode_t(-1), n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, uplo, n, (T) nullptr, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, uplo, n, dA, lda, stA, (T) nullptr, ldb, stB, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, (int*)nullptr, dW, stW, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNev, (U) nullptr, stW, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNev, dW, stW, dWork, lwork, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_sygvdx_hegvdx_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; int n = 1; int lda = 1; int ldb = 1; int ldz = 1; int stA = 1; int stB = 1; int stW = 1; int stE = 1; int stZ = 1; int bc = 1; hipsolverEigType_t itype = HIPSOLVER_EIG_TYPE_1; hipsolverEigMode_t evect = HIPSOLVER_EIG_MODE_VECTOR; hipsolverEigRange_t erange = HIPSOLVER_EIG_RANGE_V; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; S vl = 0.0; S vu = 1.0; int il = 0; int iu = 0; if(BATCHED) { // // memory allocations // host_strided_batch_vector hNev(1, 1, 1, 1); // device_batch_vector dA(1, 1, 1); // device_batch_vector dB(1, 1, 1); // device_batch_vector dZ(1, 1, 1); // device_strided_batch_vector dW(1, 1, 1, 1); // device_strided_batch_vector dE(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dB.memcheck()); // CHECK_HIP_ERROR(dZ.memcheck()); // CHECK_HIP_ERROR(dW.memcheck()); // CHECK_HIP_ERROR(dE.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // int size_W; // hipsolver_sygvdx_hegvdx_bufferSize(API, // handle, // itype, // evect, // erange, // uplo, // n, // dA.data(), // lda, // dB.data(), // ldb, // vl, // vu, // il, // iu, // hNev.data(), // dW.data(), // &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // sygvdx_hegvdx_checkBadArgs(handle, // itype, // evect, // erange, // uplo, // n, // dA.data(), // lda, // stA, // dB.data(), // ldb, // stB, // vl, // vu, // il, // iu, // hNev.data(), // dW.data(), // stW, // dWork.data(), // size_W, // dInfo.data(), // bc); } else { // memory allocations host_strided_batch_vector hNev(1, 1, 1, 1); device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dB(1, 1, 1, 1); device_strided_batch_vector dZ(1, 1, 1, 1); device_strided_batch_vector dW(1, 1, 1, 1); device_strided_batch_vector dE(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dZ.memcheck()); CHECK_HIP_ERROR(dW.memcheck()); CHECK_HIP_ERROR(dE.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_sygvdx_hegvdx_bufferSize(API, handle, itype, evect, erange, uplo, n, dA.data(), lda, dB.data(), ldb, vl, vu, il, iu, hNev.data(), dW.data(), &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments sygvdx_hegvdx_checkBadArgs(handle, itype, evect, erange, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, vl, vu, il, iu, hNev.data(), dW.data(), stW, dWork.data(), size_W, dInfo.data(), bc); } } template void sygvdx_hegvdx_initData(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const int n, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, const int bc, Th& hA, Th& hB, host_strided_batch_vector& A, host_strided_batch_vector& B, const bool test) { if(CPU) { int info; int ldu = n; host_strided_batch_vector U(n * n, 1, n * n, bc); rocblas_init(hA, true); rocblas_init(U, true); for(int b = 0; b < bc; ++b) { // for testing purposes, we start with a reduced matrix M for the standard equivalent problem // with spectrum in a desired range (-20, 20). Then we construct the generalized pair // (A, B) from there. for(int i = 0; i < n; i++) { // scale matrices and set hA = M (symmetric/hermitian), hB = U (upper triangular) for(int j = i; j < n; j++) { if(i == j) { hA[b][i + j * lda] = std::real(hA[b][i + j * lda]) + 10; U[b][i + j * ldu] = std::real(U[b][i + j * ldu]) / 100 + 1; hB[b][i + j * ldb] = U[b][i + j * ldu]; } else { if(j == i + 1) { hA[b][i + j * lda] = (hA[b][i + j * lda] - 5) / 10; hA[b][j + i * lda] = conj(hA[b][i + j * lda]); } else hA[b][j + i * lda] = hA[b][i + j * lda] = 0; U[b][i + j * ldu] = (U[b][i + j * ldu] - 5) / 100; hB[b][i + j * ldb] = U[b][i + j * ldu]; hB[b][j + i * ldb] = 0; U[b][j + i * ldu] = 0; } } if(i == n / 4 || i == n / 2 || i == n - 1 || i == n / 7 || i == n / 5 || i == n / 3) hA[b][i + i * lda] *= -1; } // form B = U' U T one = T(1); cblas_trmm(HIPSOLVER_SIDE_LEFT, HIPSOLVER_FILL_MODE_UPPER, HIPSOLVER_OP_C, 'N', n, n, one, U[b], ldu, hB[b], ldb); if(itype == HIPSOLVER_EIG_TYPE_1) { // form A = U' M U cblas_trmm(HIPSOLVER_SIDE_LEFT, HIPSOLVER_FILL_MODE_UPPER, HIPSOLVER_OP_C, 'N', n, n, one, U[b], ldu, hA[b], lda); cblas_trmm(HIPSOLVER_SIDE_RIGHT, HIPSOLVER_FILL_MODE_UPPER, HIPSOLVER_OP_N, 'N', n, n, one, U[b], ldu, hA[b], lda); } else { // form A = inv(U) M inv(U') cblas_trsm(HIPSOLVER_SIDE_LEFT, HIPSOLVER_FILL_MODE_UPPER, HIPSOLVER_OP_N, 'N', n, n, one, U[b], ldu, hA[b], lda); cblas_trsm(HIPSOLVER_SIDE_RIGHT, HIPSOLVER_FILL_MODE_UPPER, HIPSOLVER_OP_C, 'N', n, n, one, U[b], ldu, hA[b], lda); } // store A and B for testing purposes if(test && evect != HIPSOLVER_EIG_MODE_NOVECTOR) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(itype != HIPSOLVER_EIG_TYPE_3) { A[b][i + j * lda] = hA[b][i + j * lda]; B[b][i + j * ldb] = hB[b][i + j * ldb]; } else { A[b][i + j * lda] = hB[b][i + j * ldb]; B[b][i + j * ldb] = hA[b][i + j * lda]; } } } } } } if(GPU) { // now copy data to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); } } template void sygvdx_hegvdx_getError(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const hipsolverEigRange_t erange, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, const S vl, const S vu, const int il, const int iu, Vh& hNevRes, Ud& dW, const int stW, Td& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Th& hARes, Th& hB, Vh& hNev, Uh& hW, Uh& hWRes, Vh& hInfo, Vh& hInfoRes, double* max_err) { constexpr bool COMPLEX = is_complex; int size_work = (COMPLEX ? 2 * n : 8 * n); int size_rwork = (COMPLEX ? 7 * n : 0); int size_iwork = 5 * n; std::vector work(size_work); std::vector rwork(size_rwork); std::vector iwork(size_iwork); host_strided_batch_vector A(lda * n, 1, lda * n, bc); host_strided_batch_vector B(ldb * n, 1, ldb * n, bc); std::vector Z(n * n); std::vector ifail(n); // input data initialization sygvdx_hegvdx_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, true); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, vl, vu, il, iu, hNevRes.data(), dW.data(), stW, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hWRes.transfer_from(dW)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); if(evect != HIPSOLVER_EIG_MODE_NOVECTOR) CHECK_HIP_ERROR(hARes.transfer_from(dA)); // CPU lapack S abstol = 2 * get_safemin(); for(int b = 0; b < bc; ++b) { cblas_sygvx_hegvx(itype, evect, erange, uplo, n, hA[b], lda, hB[b], ldb, vl, vu, il, iu, abstol, hNev[b], hW[b], Z.data(), n, work.data(), size_work, rwork.data(), iwork.data(), ifail.data(), hInfo[b]); } // (We expect the used input matrices to always converge. Testing // implicitly the equivalent non-converged matrix is very complicated and it boils // down to essentially run the algorithm again and until convergence is achieved. // We do test with indefinite matrices B). // check info for non-convergence and/or positive-definiteness *max_err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) *max_err += 1; // Check number of returned eigenvalues *max_err = 0; for(int b = 0; b < bc; ++b) if(hNev[b][0] != hNevRes[b][0]) *max_err += 1; double err; for(int b = 0; b < bc; ++b) { if(evect == HIPSOLVER_EIG_MODE_NOVECTOR) { // only eigenvalues needed; can compare with LAPACK // error is ||hW - hWRes|| / ||hW|| // using frobenius norm if(hInfoRes[b][0] == 0) { err = norm_error('F', 1, hNev[b][0], 1, hW[b], hWRes[b]); *max_err = err > *max_err ? err : *max_err; } } else { // both eigenvalues and eigenvectors needed; need to implicitly test // eigenvectors due to non-uniqueness of eigenvectors under scaling if(hInfoRes[b][0] == 0) { T alpha = 1; T beta = 0; // hARes contains eigenvectors x // compute B*x (or A*x) and store in hB cblas_symm_hemm(HIPSOLVER_SIDE_LEFT, uplo, n, hNev[b][0], alpha, B[b], ldb, hARes[b], lda, beta, hB[b], ldb); if(itype == HIPSOLVER_EIG_TYPE_1) { // problem is A*x = (lambda)*B*x // compute (1/lambda)*A*x and store in hA for(int j = 0; j < hNev[b][0]; j++) { alpha = T(1) / hWRes[b][j]; cblas_symv_hemv(uplo, n, alpha, A[b], lda, hARes[b] + j * lda, 1, beta, hA[b] + j * lda, 1); } // move B*x into hARes for(int i = 0; i < n; i++) for(int j = 0; j < hNev[b][0]; j++) hARes[b][i + j * lda] = hB[b][i + j * ldb]; } else { // problem is A*B*x = (lambda)*x or B*A*x = (lambda)*x // compute (1/lambda)*A*B*x or (1/lambda)*B*A*x and store in hA for(int j = 0; j < hNev[b][0]; j++) { alpha = T(1) / hWRes[b][j]; cblas_symv_hemv(uplo, n, alpha, A[b], lda, hB[b] + j * ldb, 1, beta, hA[b] + j * lda, 1); } } // error is ||hA - hARes|| / ||hA|| // using frobenius norm err = norm_error('F', n, hNev[b][0], lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; } } } } template void sygvdx_hegvdx_getPerfData(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const hipsolverEigRange_t erange, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, const S vl, const S vu, const int il, const int iu, Vh& hNevRes, Ud& dW, const int stW, Td& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Th& hB, Vh& hNev, Uh& hW, Vh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { constexpr bool COMPLEX = is_complex; int size_work = (COMPLEX ? 2 * n : 8 * n); int size_rwork = (COMPLEX ? 7 * n : 0); int size_iwork = 5 * n; std::vector work(size_work); std::vector rwork(size_rwork); std::vector iwork(size_iwork); host_strided_batch_vector A(lda * n, 1, lda * n, bc); host_strided_batch_vector B(ldb * n, 1, ldb * n, bc); std::vector Z(n * n); std::vector ifail(n); S abstol = 2 * get_safemin(); if(!perf) { sygvdx_hegvdx_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) { cblas_sygvx_hegvx(itype, evect, erange, uplo, n, hA[b], lda, hB[b], ldb, vl, vu, il, iu, abstol, hNev[b], hW[b], Z.data(), n, work.data(), size_work, rwork.data(), iwork.data(), ifail.data(), hInfo[b]); } *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } sygvdx_hegvdx_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false); // cold calls for(int iter = 0; iter < 2; iter++) { sygvdx_hegvdx_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false); CHECK_ROCBLAS_ERROR(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, vl, vu, il, iu, hNevRes.data(), dW.data(), stW, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { sygvdx_hegvdx_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false); start = get_time_us_sync(stream); hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, vl, vu, il, iu, hNevRes.data(), dW.data(), stW, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_sygvdx_hegvdx(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; char itypeC = argus.get("itype"); char evectC = argus.get("jobz"); char erangeC = argus.get("range"); char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int ldb = argus.get("ldb", n); int stA = argus.get("strideA", lda * n); int stB = argus.get("strideB", ldb * n); int stW = argus.get("strideW", n); S vl = S(argus.get("vl", 0)); S vu = S(argus.get("vu", erangeC == 'V' ? 1 : 0)); int il = argus.get("il", erangeC == 'I' ? 1 : 0); int iu = argus.get("iu", erangeC == 'I' ? 1 : 0); hipsolverEigType_t itype = char2hipsolver_eform(itypeC); hipsolverEigMode_t evect = char2hipsolver_evect(evectC); hipsolverEigRange_t erange = char2hipsolver_erange(erangeC); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int bc = argus.batch_count; int hot_calls = argus.iters; int stARes = (argus.unit_check || argus.norm_check) ? stA : 0; int stWRes = (argus.unit_check || argus.norm_check) ? stW : 0; // determine sizes size_t size_A = size_t(lda) * n; size_t size_B = size_t(ldb) * n; size_t size_W = size_t(n); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; size_t size_WRes = (argus.unit_check || argus.norm_check) ? size_W : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || ldb < n || bc < 0 || (erange == HIPSOLVER_EIG_RANGE_V && vl >= vu) || (erange == HIPSOLVER_EIG_RANGE_I && (il < 1 || iu < 0)) || (erange == HIPSOLVER_EIG_RANGE_I && (iu > n || (n > 0 && il > iu)))); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, // handle, // itype, // evect, // erange, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // (T* const*)nullptr, // ldb, // stB, // vl, // vu, // il, // iu, // (int*)nullptr, // (S*)nullptr, // stW, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_sygvdx_hegvdx(API, handle, itype, evect, erange, uplo, n, (T*)nullptr, lda, stA, (T*)nullptr, ldb, stB, vl, vu, il, iu, (int*)nullptr, (S*)nullptr, stW, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_Work; hipsolver_sygvdx_hegvdx_bufferSize(API, handle, itype, evect, erange, uplo, n, (T*)nullptr, lda, (T*)nullptr, ldb, vl, vu, il, iu, (int*)nullptr, (S*)nullptr, &size_Work); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_Work); return; } // memory allocations (all cases) // host host_strided_batch_vector hNev(1, 1, 1, bc); host_strided_batch_vector hNevRes(1, 1, 1, bc); host_strided_batch_vector hW(size_W, 1, stW, bc); host_strided_batch_vector hWRes(size_WRes, 1, stWRes, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device device_strided_batch_vector dW(size_W, 1, stW, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_Work, 1, size_Work, bc); if(size_W) CHECK_HIP_ERROR(dW.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_Work) CHECK_HIP_ERROR(dWork.memcheck()); if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // host_batch_vector hB(size_B, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_batch_vector dB(size_B, 1, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // if(size_B) // CHECK_HIP_ERROR(dB.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // sygvdx_hegvdx_getError(handle, // itype, // evect, // erange, // uplo, // n, // dA, // lda, // stA, // dB, // ldb, // stB, // vl, // vu, // il, // iu, // hNevRes, // dW, // stW, // dWork, // size_Work, // dInfo, // bc, // hA, // hARes, // hB, // hNev, // hW, // hWRes, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // sygvdx_hegvdx_getPerfData(handle, // itype, // evect, // erange, // uplo, // n, // dA, // lda, // stA, // dB, // ldb, // stB, // vl, // vu, // il, // iu, // hNevRes, // dW, // stW, // dWork, // size_Work, // dInfo, // bc, // hA, // hB, // hNev, // hW, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); host_strided_batch_vector hB(size_B, 1, stB, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dB(size_B, 1, stB, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_B) CHECK_HIP_ERROR(dB.memcheck()); // check computations if(argus.unit_check || argus.norm_check) sygvdx_hegvdx_getError(handle, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNevRes, dW, stW, dWork, size_Work, dInfo, bc, hA, hARes, hB, hNev, hW, hWRes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) sygvdx_hegvdx_getPerfData(handle, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, hNevRes, dW, stW, dWork, size_Work, dInfo, bc, hA, hB, hNev, hW, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using 2 * n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, 2 * n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output( "itype", "evect", "uplo", "n", "lda", "ldb", "strideW", "batch_c"); rocsolver_bench_output(itypeC, evectC, uploC, n, lda, ldb, stW, bc); } else if(STRIDED) { rocsolver_bench_output("itype", "evect", "uplo", "n", "lda", "ldb", "strideA", "strideB", "strideW", "batch_c"); rocsolver_bench_output(itypeC, evectC, uploC, n, lda, ldb, stA, stB, stW, bc); } else { rocsolver_bench_output("itype", "evect", "uplo", "n", "lda", "ldb"); rocsolver_bench_output(itypeC, evectC, uploC, n, lda, ldb); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_sygvj_hegvj.hpp000066400000000000000000001461271436107207300240700ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void sygvj_hegvj_checkBadArgs(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, T dB, const int ldb, const int stB, U dD, const int stD, T dWork, const int lwork, int* dInfo, hipsolverSyevjInfo_t params, const int bc) { // handle EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, nullptr, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, params, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, handle, hipsolverEigType_t(-1), evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, params, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, handle, itype, hipsolverEigMode_t(-1), uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, params, bc), HIPSOLVER_STATUS_INVALID_ENUM); EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, handle, itype, evect, hipsolverFillMode_t(-1), n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, params, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, handle, itype, evect, uplo, n, (T) nullptr, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, dInfo, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, handle, itype, evect, uplo, n, dA, lda, stA, (T) nullptr, ldb, stB, dD, stD, dWork, lwork, dInfo, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, handle, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, (U) nullptr, stD, dWork, lwork, dInfo, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, handle, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, lwork, (int*)nullptr, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_sygvj_hegvj_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; hipsolver_local_syevj_info params; int n = 1; int lda = 1; int ldb = 1; int stA = 1; int stB = 1; int stD = 1; int bc = 1; hipsolverEigType_t itype = HIPSOLVER_EIG_TYPE_1; hipsolverEigMode_t evect = HIPSOLVER_EIG_MODE_NOVECTOR; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_batch_vector dB(1, 1, 1); // device_strided_batch_vector dD(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dB.memcheck()); // CHECK_HIP_ERROR(dD.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // int size_W; // hipsolver_sygvj_hegvj_bufferSize(API, // handle, // itype, // evect, // uplo, // n, // dA.data(), // lda, // dB.data(), // ldb, // dD.data(), // &size_W, // params); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // sygvj_hegvj_checkBadArgs(handle, // itype, // evect, // uplo, // n, // dA.data(), // lda, // stA, // dB.data(), // ldb, // stB, // dD.data(), // stD, // dWork.data(), // size_W, // dInfo.data(), // params, // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dB(1, 1, 1, 1); device_strided_batch_vector dD(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dB.memcheck()); CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_sygvj_hegvj_bufferSize(API, handle, itype, evect, uplo, n, dA.data(), lda, dB.data(), ldb, dD.data(), &size_W, params); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments sygvj_hegvj_checkBadArgs(handle, itype, evect, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, dD.data(), stD, dWork.data(), size_W, dInfo.data(), params, bc); } } template void sygvj_hegvj_initData(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const int n, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, const int bc, Th& hA, Th& hB, host_strided_batch_vector& A, host_strided_batch_vector& B, const bool test, const bool singular) { if(CPU) { rocblas_init(hA, true); rocblas_init(hB, false); for(int b = 0; b < bc; ++b) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(i == j) { hA[b][i + j * lda] = std::real(hA[b][i + j * lda]) + 400; hB[b][i + j * ldb] = std::real(hB[b][i + j * ldb]) + 400; } else { hA[b][i + j * lda] -= 4; } } } // store A and B for testing purposes if(test && evect != HIPSOLVER_EIG_MODE_NOVECTOR) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(itype != HIPSOLVER_EIG_TYPE_3) { A[b][i + j * lda] = hA[b][i + j * lda]; B[b][i + j * ldb] = hB[b][i + j * ldb]; } else { A[b][i + j * lda] = hB[b][i + j * ldb]; B[b][i + j * ldb] = hA[b][i + j * lda]; } } } } } } if(GPU) { // now copy data to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); CHECK_HIP_ERROR(dB.transfer_from(hB)); } } template void sygvj_hegvj_getError(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, Sd& dD, const int stD, Td& dWork, const int lwork, Id& dInfo, hipsolverSyevjInfo_t params, const double abstol, const int max_sweeps, const int sort_eig, const int bc, Th& hA, Th& hARes, Th& hB, Sh& hD, Sh& hDRes, Ih& hInfo, Ih& hInfoRes, Uh& hResidualRes, Ih& hSweepsRes, double* max_err, const bool singular) { constexpr bool COMPLEX = is_complex; using S = decltype(std::real(T{})); int lrwork, ltwork; if(!COMPLEX) { lrwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 2 * n + 1 : 1 + 6 * n + 2 * n * n); ltwork = 0; } else { lrwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n : 1 + 5 * n + 2 * n * n); ltwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n + 1 : 2 * n + n * n); } int liwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 1 : 3 + 5 * n); std::vector work(ltwork); std::vector rwork(lrwork); std::vector iwork(liwork); host_strided_batch_vector A(lda * n, 1, lda * n, bc); host_strided_batch_vector B(ldb * n, 1, ldb * n, bc); // input data initialization sygvj_hegvj_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, true, singular); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_sygvj_hegvj(API, handle, itype, evect, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, dD.data(), stD, dWork.data(), lwork, dInfo.data(), params, bc)); CHECK_HIP_ERROR(hDRes.transfer_from(dD)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); if(evect != HIPSOLVER_EIG_MODE_NOVECTOR) CHECK_HIP_ERROR(hARes.transfer_from(dA)); hipsolverXsyevjGetResidual(handle, params, hResidualRes.data()); hipsolverXsyevjGetSweeps(handle, params, hSweepsRes.data()); // CPU lapack for(int b = 0; b < bc; ++b) { cblas_sygvd_hegvd(itype, evect, uplo, n, hA[b], lda, hB[b], ldb, hD[b], work.data(), ltwork, rwork.data(), lrwork, iwork.data(), liwork, hInfo[b]); } // check info for non-convergence and/or positive-definiteness *max_err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) *max_err += 1; // Also check validity of residual for(rocblas_int b = 0; b < bc; ++b) if(hResidualRes[b][0] < 0) *max_err += 1; // Also check validity of sweeps for(rocblas_int b = 0; b < bc; ++b) if(hSweepsRes[b][0] < 0 || hSweepsRes[b][0] > max_sweeps) *max_err += 1; double err; if(evect == HIPSOLVER_EIG_MODE_NOVECTOR) { // only eigenvalues needed; can compare with LAPACK // error is ||hD - hDRes|| / ||hD|| // using frobenius norm for(int b = 0; b < bc; ++b) { if(hInfoRes[b][0] == 0 && sort_eig) { err = norm_error('F', 1, n, 1, hD[b], hDRes[b]); *max_err = err > *max_err ? err : *max_err; } } } else { // both eigenvalues and eigenvectors needed; need to implicitly test // eigenvectors due to non-uniqueness of eigenvectors under scaling for(int b = 0; b < bc; ++b) { if(hInfoRes[b][0] == 0) { T alpha = 1; T beta = 0; // hARes contains eigenvectors x // compute B*x (or A*x) and store in hB cblas_symm_hemm(HIPSOLVER_SIDE_LEFT, uplo, n, n, alpha, B[b], ldb, hARes[b], lda, beta, hB[b], ldb); if(itype == HIPSOLVER_EIG_TYPE_1) { // problem is A*x = (lambda)*B*x // compute (1/lambda)*A*x and store in hA for(int j = 0; j < n; j++) { alpha = T(1) / hDRes[b][j]; cblas_symv_hemv(uplo, n, alpha, A[b], lda, hARes[b] + j * lda, 1, beta, hA[b] + j * lda, 1); } // move B*x into hARes for(int i = 0; i < n; i++) for(int j = 0; j < n; j++) hARes[b][i + j * lda] = hB[b][i + j * ldb]; } else { // problem is A*B*x = (lambda)*x or B*A*x = (lambda)*x // compute (1/lambda)*A*B*x or (1/lambda)*B*A*x and store in hA for(int j = 0; j < n; j++) { alpha = T(1) / hDRes[b][j]; cblas_symv_hemv(uplo, n, alpha, A[b], lda, hB[b] + j * ldb, 1, beta, hA[b] + j * lda, 1); } } // error is ||hA - hARes|| / ||hA|| // using frobenius norm err = norm_error('F', n, n, lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; } } } } template void sygvj_hegvj_getPerfData(const hipsolverHandle_t handle, const hipsolverEigType_t itype, const hipsolverEigMode_t evect, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Td& dB, const int ldb, const int stB, Sd& dD, const int stD, Td& dWork, const int lwork, Id& dInfo, hipsolverSyevjInfo_t params, const int bc, Th& hA, Th& hB, Sh& hD, Ih& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf, const bool singular) { constexpr bool COMPLEX = is_complex; using S = decltype(std::real(T{})); int lrwork, ltwork; if(!COMPLEX) { lrwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 2 * n + 1 : 1 + 6 * n + 2 * n * n); ltwork = 0; } else { lrwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n : 1 + 5 * n + 2 * n * n); ltwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? n + 1 : 2 * n + n * n); } int liwork = (evect == HIPSOLVER_EIG_MODE_NOVECTOR ? 1 : 3 + 5 * n); std::vector work(ltwork); std::vector rwork(lrwork); std::vector iwork(liwork); host_strided_batch_vector A(1, 1, 1, 1); host_strided_batch_vector B(1, 1, 1, 1); if(!perf) { sygvj_hegvj_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false, singular); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) { cblas_sygvd_hegvd(itype, evect, uplo, n, hA[b], lda, hB[b], ldb, hD[b], work.data(), ltwork, rwork.data(), lrwork, iwork.data(), liwork, hInfo[b]); } *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } sygvj_hegvj_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false, singular); // cold calls for(int iter = 0; iter < 2; iter++) { sygvj_hegvj_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false, singular); CHECK_ROCBLAS_ERROR(hipsolver_sygvj_hegvj(API, handle, itype, evect, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, dD.data(), stD, dWork.data(), lwork, dInfo.data(), params, bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { sygvj_hegvj_initData( handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, false, singular); start = get_time_us_sync(stream); hipsolver_sygvj_hegvj(API, handle, itype, evect, uplo, n, dA.data(), lda, stA, dB.data(), ldb, stB, dD.data(), stD, dWork.data(), lwork, dInfo.data(), params, bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_sygvj_hegvj(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; hipsolver_local_syevj_info params; char itypeC = argus.get("itype"); char evectC = argus.get("jobz"); char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int ldb = argus.get("ldb", n); int stA = argus.get("strideA", lda * n); int stB = argus.get("strideB", ldb * n); int stD = argus.get("strideD", n); double abstol = argus.get("tolerance", 2 * get_epsilon()); rocblas_int max_sweeps = argus.get("max_sweeps", 100); rocblas_int sort_eig = argus.get("sort_eig", 1); hipsolverXsyevjSetTolerance(params, abstol); hipsolverXsyevjSetMaxSweeps(params, max_sweeps); hipsolverXsyevjSetSortEig(params, sort_eig); hipsolverEigType_t itype = char2hipsolver_eform(itypeC); hipsolverEigMode_t evect = char2hipsolver_evect(evectC); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int bc = argus.batch_count; int hot_calls = argus.iters; int stARes = (argus.unit_check || argus.norm_check) ? stA : 0; int stDRes = (argus.unit_check || argus.norm_check) ? stD : 0; // determine sizes size_t size_A = size_t(lda) * n; size_t size_B = size_t(ldb) * n; size_t size_D = size_t(n); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; size_t size_DRes = (argus.unit_check || argus.norm_check) ? size_D : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || ldb < n || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, // handle, // itype, // evect, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // (T* const*)nullptr, // ldb, // stB, // (S*)nullptr, // stD, // (T*)nullptr, // 0, // (int*)nullptr, // params, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_sygvj_hegvj(API, handle, itype, evect, uplo, n, (T*)nullptr, lda, stA, (T*)nullptr, ldb, stB, (S*)nullptr, stD, (T*)nullptr, 0, (int*)nullptr, params, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_sygvj_hegvj_bufferSize(API, handle, itype, evect, uplo, n, (T*)nullptr, lda, (T*)nullptr, ldb, (S*)nullptr, &size_W, params); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations // host host_strided_batch_vector hResidualRes(1, 1, 1, bc); host_strided_batch_vector hSweepsRes(1, 1, 1, bc); host_strided_batch_vector hD(size_D, 1, stD, bc); host_strided_batch_vector hDRes(size_DRes, 1, stDRes, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device device_strided_batch_vector dD(size_D, 1, stD, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_D) CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // host_batch_vector hB(size_B, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_batch_vector dB(size_B, 1, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // if(size_B) // CHECK_HIP_ERROR(dB.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // sygvj_hegvj_getError(handle, // itype, // evect, // uplo, // n, // dA, // lda, // stA, // dB, // ldb, // stB, // dD, // stD, // dWork, // size_W, // dInfo, // params, // abstol, // max_sweeps, // sort_eig, // bc, // hA, // hARes, // hB, // hD, // hDRes, // hInfo, // hInfoRes, // hResidualRes, // hSweepsRes, // &max_error, // argus.singular); // // collect performance data // if(argus.timing) // sygvj_hegvj_getPerfData(handle, // itype, // evect, // uplo, // n, // dA, // lda, // stA, // dB, // ldb, // stB, // dD, // stD, // dWork, // size_W, // dInfo, // params, // bc, // hA, // hB, // hD, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf, // argus.singular); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); host_strided_batch_vector hB(size_B, 1, stB, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dB(size_B, 1, stB, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); if(size_B) CHECK_HIP_ERROR(dB.memcheck()); // check computations if(argus.unit_check || argus.norm_check) sygvj_hegvj_getError(handle, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, size_W, dInfo, params, abstol, max_sweeps, sort_eig, bc, hA, hARes, hB, hD, hDRes, hInfo, hInfoRes, hResidualRes, hSweepsRes, &max_error, argus.singular); // collect performance data if(argus.timing) sygvj_hegvj_getPerfData(handle, itype, evect, uplo, n, dA, lda, stA, dB, ldb, stB, dD, stD, dWork, size_W, dInfo, params, bc, hA, hB, hD, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf, argus.singular); } // validate results for rocsolver-test // using 2 * n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, 2 * n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("itype", "evect", "uplo", "n", "lda", "ldb", "strideD", "tolerance", "max_sweeps", "sort_eig", "batch_c"); rocsolver_bench_output( itypeC, evectC, uploC, n, lda, ldb, stD, abstol, max_sweeps, sort_eig, bc); } else if(STRIDED) { rocsolver_bench_output("itype", "evect", "uplo", "n", "lda", "ldb", "strideA", "strideB", "strideD", "tolerance", "max_sweeps", "sort_eig", "batch_c"); rocsolver_bench_output(itypeC, evectC, uploC, n, lda, ldb, stA, stB, stD, abstol, max_sweeps, sort_eig, bc); } else { rocsolver_bench_output("itype", "evect", "uplo", "n", "lda", "ldb", "tolerance", "max_sweeps", "sort_eig"); rocsolver_bench_output( itypeC, evectC, uploC, n, lda, ldb, abstol, max_sweeps, sort_eig); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_sytrd_hetrd.hpp000066400000000000000000001222521436107207300240670ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void sytrd_hetrd_checkBadArgs(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, S dD, const int stD, S dE, const int stE, U dTau, const int stP, U dWork, const int lwork, V dInfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, nullptr, uplo, n, dA, lda, stA, dD, stD, dE, stE, dTau, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, handle, hipsolverFillMode_t(-1), n, dA, lda, stA, dD, stD, dE, stE, dTau, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, (T) nullptr, lda, stA, dD, stD, dE, stE, dTau, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, dA, lda, stA, (S) nullptr, stD, dE, stE, dTau, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, dA, lda, stA, dD, stD, (S) nullptr, stE, dTau, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, dA, lda, stA, dD, stD, dE, stE, (U) nullptr, stP, dWork, lwork, dInfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, dA, lda, stA, dD, stD, dE, stE, dTau, stP, dWork, lwork, (V) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_sytrd_hetrd_bad_arg() { using S = decltype(std::real(T{})); // safe arguments hipsolver_local_handle handle; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; int n = 1; int lda = 1; int stA = 1; int stD = 1; int stE = 1; int stP = 1; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dD(1, 1, 1, 1); // device_strided_batch_vector dE(1, 1, 1, 1); // device_strided_batch_vector dTau(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dD.memcheck()); // CHECK_HIP_ERROR(dE.memcheck()); // CHECK_HIP_ERROR(dTau.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // int size_W; // hipsolver_sytrd_hetrd_bufferSize( // FORTRAN, handle, uplo, n, dA.data(), lda, dD.data(), dE.data(), dTau.data(), &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // sytrd_hetrd_checkBadArgs(handle, // uplo, // n, // dA.data(), // lda, // stA, // dD.data(), // stD, // dE.data(), // stE, // dTau.data(), // stP, // dWork.data(), // size_W, // dInfo.data(), // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dD(1, 1, 1, 1); device_strided_batch_vector dE(1, 1, 1, 1); device_strided_batch_vector dTau(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dD.memcheck()); CHECK_HIP_ERROR(dE.memcheck()); CHECK_HIP_ERROR(dTau.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_sytrd_hetrd_bufferSize( FORTRAN, handle, uplo, n, dA.data(), lda, dD.data(), dE.data(), dTau.data(), &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments sytrd_hetrd_checkBadArgs(handle, uplo, n, dA.data(), lda, stA, dD.data(), stD, dE.data(), stE, dTau.data(), stP, dWork.data(), size_W, dInfo.data(), bc); } } template , int> = 0> void sytrd_hetrd_initData( const hipsolverHandle_t handle, const int n, Td& dA, const int lda, const int bc, Th& hA) { if(CPU) { rocblas_init(hA, true); // scale A to avoid singularities for(int b = 0; b < bc; ++b) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(i == j || i == j + 1 || i == j - 1) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template , int> = 0> void sytrd_hetrd_initData( const hipsolverHandle_t handle, const int n, Td& dA, const int lda, const int bc, Th& hA) { if(CPU) { rocblas_init(hA, true); // scale A to avoid singularities for(int b = 0; b < bc; ++b) { for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] = hA[b][i + j * lda].real() + 400; else if(i == j + 1 || i == j - 1) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } } } if(GPU) { // now copy to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void sytrd_hetrd_getError(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Sd& dD, const int stD, Sd& dE, const int stE, Ud& dTau, const int stP, Ud& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Th& hARes, Sh& hD, Sh& hE, Uh& hTau, Vh& hInfo, Vh& hInfoRes, double* max_err) { using S = decltype(std::real(T{})); constexpr bool COMPLEX = is_complex; std::vector hW(32 * n); // input data initialization sytrd_hetrd_initData(handle, n, dA, lda, bc, hA); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, dA.data(), lda, stA, dD.data(), stD, dE.data(), stE, dTau.data(), stP, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hTau.transfer_from(dTau)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // Reconstruct matrix A from the factorization for implicit testing // A = H(n-1)...H(2)H(1)*T*H(1)'H(2)'...H(n-1)' if upper // A = H(1)H(2)...H(n-1)*T*H(n-1)'...H(2)'H(1)' if lower std::vector v(n); for(int b = 0; b < bc; ++b) { T* a = hARes[b]; T* t = hTau[b]; if(uplo == HIPSOLVER_FILL_MODE_LOWER) { for(int i = 0; i < n - 2; ++i) a[i + (n - 1) * lda] = 0; a[(n - 2) + (n - 1) * lda] = a[(n - 1) + (n - 2) * lda]; // for each column for(int j = n - 2; j >= 0; --j) { // prepare T and v for(int i = 0; i < j - 1; ++i) a[i + j * lda] = 0; if(j > 0) a[(j - 1) + j * lda] = a[j + (j - 1) * lda]; for(int i = j + 2; i < n; ++i) { v[i - j - 1] = a[i + j * lda]; a[i + j * lda] = 0; } v[0] = 1; // apply householder reflector cblas_larf(HIPSOLVER_SIDE_LEFT, n - 1 - j, n - j, v.data(), 1, t + j, a + (j + 1) + j * lda, lda, hW.data()); if(COMPLEX) cblas_lacgv(1, t + j, 1); cblas_larf(HIPSOLVER_SIDE_RIGHT, n - j, n - 1 - j, v.data(), 1, t + j, a + j + (j + 1) * lda, lda, hW.data()); } } else { a[1] = a[lda]; for(int i = 2; i < n; ++i) a[i] = 0; // for each column for(int j = 1; j <= n - 1; ++j) { // prepare T and v for(int i = 0; i < j - 1; ++i) { v[i] = a[i + j * lda]; a[i + j * lda] = 0; } v[j - 1] = 1; if(j < n - 1) a[(j + 1) + j * lda] = a[j + (j + 1) * lda]; for(int i = j + 2; i < n; ++i) a[i + j * lda] = 0; // apply householder reflector cblas_larf( HIPSOLVER_SIDE_LEFT, j, j + 1, v.data(), 1, t + j - 1, a, lda, hW.data()); if(COMPLEX) cblas_lacgv(1, t + j - 1, 1); cblas_larf( HIPSOLVER_SIDE_RIGHT, j + 1, j, v.data(), 1, t + j - 1, a, lda, hW.data()); } } } // error is ||hA - hARes|| / ||hA|| // using frobenius norm double err; *max_err = 0; for(int b = 0; b < bc; ++b) { *max_err = (uplo == HIPSOLVER_FILL_MODE_LOWER) ? norm_error_lowerTr('F', n, n, lda, hA[b], hARes[b]) : norm_error_upperTr('F', n, n, lda, hA[b], hARes[b]); } // check info err = 0; for(int b = 0; b < bc; ++b) if(hInfoRes[b][0] != 0) err++; *max_err += err; } template void sytrd_hetrd_getPerfData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Sd& dD, const int stD, Sd& dE, const int stE, Ud& dTau, const int stP, Ud& dWork, const int lwork, Vd& dInfo, const int bc, Th& hA, Sh& hD, Sh& hE, Uh& hTau, Vh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { using S = decltype(std::real(T{})); std::vector hW(32 * n); if(!perf) { sytrd_hetrd_initData(handle, n, dA, lda, bc, hA); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_sytrd_hetrd(uplo, n, hA[b], lda, hD[b], hE[b], hTau[b], hW.data(), 32 * n); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } sytrd_hetrd_initData(handle, n, dA, lda, bc, hA); // cold calls for(int iter = 0; iter < 2; iter++) { sytrd_hetrd_initData(handle, n, dA, lda, bc, hA); CHECK_ROCBLAS_ERROR(hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, dA.data(), lda, stA, dD.data(), stD, dE.data(), stE, dTau.data(), stP, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { sytrd_hetrd_initData(handle, n, dA, lda, bc, hA); start = get_time_us_sync(stream); hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, dA.data(), lda, stA, dD.data(), stD, dE.data(), stE, dTau.data(), stP, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_sytrd_hetrd(Arguments& argus) { using S = decltype(std::real(T{})); // get arguments hipsolver_local_handle handle; char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int stA = argus.get("strideA", lda * n); int stD = argus.get("strideD", n); int stE = argus.get("strideE", n - 1); int stP = argus.get("strideP", n - 1); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int bc = argus.batch_count; int hot_calls = argus.iters; int stARes = (argus.unit_check || argus.norm_check) ? stA : 0; // check non-supported values if(uplo != HIPSOLVER_FILL_MODE_UPPER && uplo != HIPSOLVER_FILL_MODE_LOWER) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, // handle, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // (S*)nullptr, // stD, // (S*)nullptr, // stE, // (T*)nullptr, // stP, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, (T*)nullptr, lda, stA, (S*)nullptr, stD, (S*)nullptr, stE, (T*)nullptr, stP, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_args); return; } // determine sizes size_t size_A = lda * n; size_t size_D = n; size_t size_E = n - 1; size_t size_tau = n - 1; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, // handle, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // (S*)nullptr, // stD, // (S*)nullptr, // stE, // (T*)nullptr, // stP, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_sytrd_hetrd(FORTRAN, handle, uplo, n, (T*)nullptr, lda, stA, (S*)nullptr, stD, (S*)nullptr, stE, (T*)nullptr, stP, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_sytrd_hetrd_bufferSize( FORTRAN, handle, uplo, n, (T*)nullptr, lda, (S*)nullptr, (S*)nullptr, (T*)nullptr, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } // memory allocations (all cases) // host host_strided_batch_vector hD(size_D, 1, stD, bc); host_strided_batch_vector hE(size_E, 1, stE, bc); host_strided_batch_vector hTau(size_tau, 1, stP, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device device_strided_batch_vector dD(size_D, 1, stD, bc); device_strided_batch_vector dE(size_E, 1, stE, bc); device_strided_batch_vector dTau(size_tau, 1, stP, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_D) CHECK_HIP_ERROR(dD.memcheck()); if(size_E) CHECK_HIP_ERROR(dE.memcheck()); if(size_tau) CHECK_HIP_ERROR(dTau.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // device_batch_vector dA(size_A, 1, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // sytrd_hetrd_getError(handle, // uplo, // n, // dA, // lda, // stA, // dD, // stD, // dE, // stE, // dTau, // stP, // dWork, // size_W, // dInfo, // bc, // hA, // hARes, // hD, // hE, // hTau, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // sytrd_hetrd_getPerfData(handle, // uplo, // n, // dA, // lda, // stA, // dD, // stD, // dE, // stE, // dTau, // stP, // dWork, // size_W, // dInfo, // bc, // hA, // hD, // hE, // hTau, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); // check computations if(argus.unit_check || argus.norm_check) sytrd_hetrd_getError(handle, uplo, n, dA, lda, stA, dD, stD, dE, stE, dTau, stP, dWork, size_W, dInfo, bc, hA, hARes, hD, hE, hTau, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) sytrd_hetrd_getPerfData(handle, uplo, n, dA, lda, stA, dD, stD, dE, stE, dTau, stP, dWork, size_W, dInfo, bc, hA, hD, hE, hTau, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output( "uplo", "n", "lda", "strideD", "strideE", "strideP", "batch_c"); rocsolver_bench_output(uploC, n, lda, stD, stE, stP, bc); } else if(STRIDED) { rocsolver_bench_output( "uplo", "n", "lda", "strideA", "strideD", "strideE", "strideP", "batch_c"); rocsolver_bench_output(uploC, n, lda, stA, stD, stE, stP, bc); } else { rocsolver_bench_output("uplo", "n", "lda"); rocsolver_bench_output(uploC, n, lda); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/testing_sytrf.hpp000066400000000000000000000667561436107207300227230ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "clientcommon.hpp" template void sytrf_checkBadArgs(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, T dA, const int lda, const int stA, U dIpiv, const int stP, V dWork, const int lwork, U dinfo, const int bc) { // handle EXPECT_ROCBLAS_STATUS( hipsolver_sytrf( FORTRAN, nullptr, uplo, n, dA, lda, stA, dIpiv, stP, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_NOT_INITIALIZED); // values EXPECT_ROCBLAS_STATUS(hipsolver_sytrf(FORTRAN, handle, hipsolverFillMode_t(-1), n, dA, lda, stA, dIpiv, stP, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_ENUM); #if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) // pointers EXPECT_ROCBLAS_STATUS( hipsolver_sytrf( FORTRAN, handle, uplo, n, (T) nullptr, lda, stA, dIpiv, stP, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_sytrf( FORTRAN, handle, uplo, n, dA, lda, stA, (U) nullptr, stP, dWork, lwork, dinfo, bc), HIPSOLVER_STATUS_INVALID_VALUE); EXPECT_ROCBLAS_STATUS( hipsolver_sytrf( FORTRAN, handle, uplo, n, dA, lda, stA, dIpiv, stP, dWork, lwork, (U) nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); #endif } template void testing_sytrf_bad_arg() { // safe arguments hipsolver_local_handle handle; hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_UPPER; int n = 1; int lda = 1; int stA = 1; int stP = 1; int bc = 1; if(BATCHED) { // // memory allocations // device_batch_vector dA(1, 1, 1); // device_strided_batch_vector dIpiv(1, 1, 1, 1); // device_strided_batch_vector dInfo(1, 1, 1, 1); // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dIpiv.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // int size_W; // hipsolver_sytrf_bufferSize(FORTRAN, handle, n, dA.data(), lda, &size_W); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check bad arguments // sytrf_checkBadArgs(handle, // uplo, // n, // dA.data(), // lda, // stA, // dIpiv.data(), // stP, // dWork.data(), // size_W, // dInfo.data(), // bc); } else { // memory allocations device_strided_batch_vector dA(1, 1, 1, 1); device_strided_batch_vector dIpiv(1, 1, 1, 1); device_strided_batch_vector dInfo(1, 1, 1, 1); CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dIpiv.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); int size_W; hipsolver_sytrf_bufferSize(FORTRAN, handle, n, dA.data(), lda, &size_W); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check bad arguments sytrf_checkBadArgs(handle, uplo, n, dA.data(), lda, stA, dIpiv.data(), stP, dWork.data(), size_W, dInfo.data(), bc); } } template void sytrf_initData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Ud& dInfo, const int bc, Th& hA, Uh& hIpiv, Uh& hInfo) { if(CPU) { T tmp; rocblas_init(hA, true); for(int b = 0; b < bc; ++b) { // scale A to avoid singularities for(int i = 0; i < n; i++) { for(int j = 0; j < n; j++) { if(i == j) hA[b][i + j * lda] += 400; else hA[b][i + j * lda] -= 4; } } // shuffle rows to test pivoting // always the same permuation for debugging purposes for(int i = 0; i < n / 2; i++) { for(int j = 0; j < n; j++) { tmp = hA[b][i + j * lda]; hA[b][i + j * lda] = hA[b][n - 1 - i + j * lda]; hA[b][n - 1 - i + j * lda] = tmp; } } } } if(GPU) { // now copy data to the GPU CHECK_HIP_ERROR(dA.transfer_from(hA)); } } template void sytrf_getError(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Vd& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Th& hARes, Uh& hIpiv, Uh& hIpivRes, Uh& hInfo, Uh& hInfoRes, double* max_err) { int size_W = 64 * n; std::vector hW(size_W); // input data initialization sytrf_initData( handle, uplo, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(hipsolver_sytrf(FORTRAN, handle, uplo, n, dA.data(), lda, stA, dIpiv.data(), stP, dWork.data(), lwork, dInfo.data(), bc)); CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hIpivRes.transfer_from(dIpiv)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); // CPU lapack for(int b = 0; b < bc; ++b) cblas_sytrf(uplo, n, hA[b], lda, hIpiv[b], hW.data(), size_W, hInfo[b]); // error is ||hA - hARes|| / ||hA|| // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. // IT MIGHT BE REVISITED IN THE FUTURE) // using frobenius norm double err; *max_err = 0; for(int b = 0; b < bc; ++b) { err = norm_error('F', n, n, lda, hA[b], hARes[b]); *max_err = err > *max_err ? err : *max_err; // also check pivoting (count the number of incorrect pivots) err = 0; for(int i = 0; i < n; ++i) if(hIpiv[b][i] != hIpivRes[b][i]) err++; *max_err = err > *max_err ? err : *max_err; } // also check info err = 0; for(int b = 0; b < bc; ++b) if(hInfo[b][0] != hInfoRes[b][0]) err++; *max_err += err; } template void sytrf_getPerfData(const hipsolverHandle_t handle, const hipsolverFillMode_t uplo, const int n, Td& dA, const int lda, const int stA, Ud& dIpiv, const int stP, Vd& dWork, const int lwork, Ud& dInfo, const int bc, Th& hA, Uh& hIpiv, Uh& hInfo, double* gpu_time_used, double* cpu_time_used, const int hot_calls, const bool perf) { int size_W = 64 * n; std::vector hW(size_W); if(!perf) { sytrf_initData( handle, uplo, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); // cpu-lapack performance (only if not in perf mode) *cpu_time_used = get_time_us_no_sync(); for(int b = 0; b < bc; ++b) cblas_sytrf(uplo, n, hA[b], lda, hIpiv[b], hW.data(), size_W, hInfo[b]); *cpu_time_used = get_time_us_no_sync() - *cpu_time_used; } sytrf_initData( handle, uplo, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); // cold calls for(int iter = 0; iter < 2; iter++) { sytrf_initData( handle, uplo, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); CHECK_ROCBLAS_ERROR(hipsolver_sytrf(FORTRAN, handle, uplo, n, dA.data(), lda, stA, dIpiv.data(), stP, dWork.data(), lwork, dInfo.data(), bc)); } // gpu-lapack performance hipStream_t stream; CHECK_ROCBLAS_ERROR(hipsolverGetStream(handle, &stream)); double start; for(int iter = 0; iter < hot_calls; iter++) { sytrf_initData( handle, uplo, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, hInfo); start = get_time_us_sync(stream); hipsolver_sytrf(FORTRAN, handle, uplo, n, dA.data(), lda, stA, dIpiv.data(), stP, dWork.data(), lwork, dInfo.data(), bc); *gpu_time_used += get_time_us_sync(stream) - start; } *gpu_time_used /= hot_calls; } template void testing_sytrf(Arguments& argus) { // get arguments hipsolver_local_handle handle; char uploC = argus.get("uplo"); int n = argus.get("n"); int lda = argus.get("lda", n); int stA = argus.get("strideA", lda * n); int stP = argus.get("strideP", n); hipsolverFillMode_t uplo = char2hipsolver_fill(uploC); int bc = argus.batch_count; int hot_calls = argus.iters; int stARes = (argus.unit_check || argus.norm_check) ? stA : 0; int stPRes = (argus.unit_check || argus.norm_check) ? stP : 0; // check non-supported values if(uplo != HIPSOLVER_FILL_MODE_UPPER && uplo != HIPSOLVER_FILL_MODE_LOWER) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_sytrf(FORTRAN, // handle, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // (int*)nullptr, // stP, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_sytrf(FORTRAN, handle, uplo, n, (T*)nullptr, lda, stA, (int*)nullptr, stP, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_args); return; } // determine sizes size_t size_A = size_t(lda) * n; size_t size_P = size_t(n); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; size_t size_PRes = (argus.unit_check || argus.norm_check) ? size_P : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || bc < 0); if(invalid_size) { if(BATCHED) { // EXPECT_ROCBLAS_STATUS(hipsolver_sytrf(FORTRAN, // handle, // uplo, // n, // (T* const*)nullptr, // lda, // stA, // (int*)nullptr, // stP, // (T*)nullptr, // 0, // (int*)nullptr, // bc), // HIPSOLVER_STATUS_INVALID_VALUE); } else { EXPECT_ROCBLAS_STATUS(hipsolver_sytrf(FORTRAN, handle, uplo, n, (T*)nullptr, lda, stA, (int*)nullptr, stP, (T*)nullptr, 0, (int*)nullptr, bc), HIPSOLVER_STATUS_INVALID_VALUE); } if(argus.timing) rocsolver_bench_inform(inform_invalid_size); return; } // memory size query is necessary int size_W; hipsolver_sytrf_bufferSize(FORTRAN, handle, n, (T*)nullptr, lda, &size_W); if(argus.mem_query) { rocsolver_bench_inform(inform_mem_query, size_W); return; } if(BATCHED) { // // memory allocations // host_batch_vector hA(size_A, 1, bc); // host_batch_vector hARes(size_ARes, 1, bc); // host_strided_batch_vector hIpiv(size_P, 1, stP, bc); // host_strided_batch_vector hIpivRes(size_PRes, 1, stPRes, bc); // host_strided_batch_vector hInfo(1, 1, 1, bc); // host_strided_batch_vector hInfoRes(1, 1, 1, bc); // device_batch_vector dA(size_A, 1, bc); // device_strided_batch_vector dIpiv(size_P, 1, stP, bc); // device_strided_batch_vector dInfo(1, 1, 1, bc); // device_strided_batch_vector dWork(size_W, 1, size_W, bc); // if(size_A) // CHECK_HIP_ERROR(dA.memcheck()); // CHECK_HIP_ERROR(dInfo.memcheck()); // if(size_P) // CHECK_HIP_ERROR(dIpiv.memcheck()); // if(size_W) // CHECK_HIP_ERROR(dWork.memcheck()); // // check computations // if(argus.unit_check || argus.norm_check) // sytrf_getError(handle, // uplo, // n, // dA, // lda, // stA, // dIpiv, // stP, // dWork, // size_W, // dInfo, // bc, // hA, // hARes, // hIpiv, // hIpivRes, // hInfo, // hInfoRes, // &max_error); // // collect performance data // if(argus.timing) // sytrf_getPerfData(handle, // uplo, // n, // dA, // lda, // stA, // dIpiv, // stP, // dWork, // size_W, // dInfo, // bc, // hA, // hIpiv, // hInfo, // &gpu_time_used, // &cpu_time_used, // hot_calls, // argus.perf); } else { // memory allocations host_strided_batch_vector hA(size_A, 1, stA, bc); host_strided_batch_vector hARes(size_ARes, 1, stARes, bc); host_strided_batch_vector hIpiv(size_P, 1, stP, bc); host_strided_batch_vector hIpivRes(size_PRes, 1, stPRes, bc); host_strided_batch_vector hInfo(1, 1, 1, bc); host_strided_batch_vector hInfoRes(1, 1, 1, bc); device_strided_batch_vector dA(size_A, 1, stA, bc); device_strided_batch_vector dIpiv(size_P, 1, stP, bc); device_strided_batch_vector dInfo(1, 1, 1, bc); device_strided_batch_vector dWork(size_W, 1, size_W, bc); if(size_A) CHECK_HIP_ERROR(dA.memcheck()); CHECK_HIP_ERROR(dInfo.memcheck()); if(size_P) CHECK_HIP_ERROR(dIpiv.memcheck()); if(size_W) CHECK_HIP_ERROR(dWork.memcheck()); // check computations if(argus.unit_check || argus.norm_check) sytrf_getError(handle, uplo, n, dA, lda, stA, dIpiv, stP, dWork, size_W, dInfo, bc, hA, hARes, hIpiv, hIpivRes, hInfo, hInfoRes, &max_error); // collect performance data if(argus.timing) sytrf_getPerfData(handle, uplo, n, dA, lda, stA, dIpiv, stP, dWork, size_W, dInfo, bc, hA, hIpiv, hInfo, &gpu_time_used, &cpu_time_used, hot_calls, argus.perf); } // validate results for rocsolver-test // using n * machine_precision as tolerance if(argus.unit_check) ROCSOLVER_TEST_CHECK(T, max_error, n); // output results for rocsolver-bench if(argus.timing) { if(!argus.perf) { std::cerr << "\n============================================\n"; std::cerr << "Arguments:\n"; std::cerr << "============================================\n"; if(BATCHED) { rocsolver_bench_output("uplo", "n", "lda", "strideP", "batch_c"); rocsolver_bench_output(uploC, n, lda, stP, bc); } else if(STRIDED) { rocsolver_bench_output("uplo", "n", "lda", "strideA", "strideP", "batch_c"); rocsolver_bench_output(uploC, n, lda, stA, stP, bc); } else { rocsolver_bench_output("uplo", "n", "lda"); rocsolver_bench_output(uploC, n, lda); } std::cerr << "\n============================================\n"; std::cerr << "Results:\n"; std::cerr << "============================================\n"; if(argus.norm_check) { rocsolver_bench_output("cpu_time", "gpu_time", "error"); rocsolver_bench_output(cpu_time_used, gpu_time_used, max_error); } else { rocsolver_bench_output("cpu_time", "gpu_time"); rocsolver_bench_output(cpu_time_used, gpu_time_used); } std::cerr << std::endl; } else { if(argus.norm_check) rocsolver_bench_output(gpu_time_used, max_error); else rocsolver_bench_output(gpu_time_used); } } // ensure all arguments were consumed argus.validate_consumed(); } hipSOLVER-rocm-5.5.1/clients/include/utility.hpp000066400000000000000000000362671436107207300215140ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "hipsolver.h" #ifdef __cplusplus #include "complex.hpp" #include "hipsolver_datatype2string.hpp" #include #include #include #include #include #endif #include #include /*!\file * \brief provide data initialization, timing, hipsolver type <-> lapack char conversion utilities. */ #ifdef GOOGLE_TEST #include #define CHECK_HIP_ERROR(error) ASSERT_EQ(error, hipSuccess) inline void hipsolver_expect_status(hipsolverStatus_t status, hipsolverStatus_t expected) { if(status != HIPSOLVER_STATUS_NOT_SUPPORTED) ASSERT_EQ(status, expected); } #define EXPECT_ROCBLAS_STATUS(status, expected) hipsolver_expect_status(status, expected) #define CHECK_ROCBLAS_ERROR(status) hipsolver_expect_status(status, HIPSOLVER_STATUS_SUCCESS) #else #define CHECK_HIP_ERROR(error) \ do \ { \ if(error != hipSuccess) \ { \ fprintf(stderr, \ "error: '%s'(%d) at %s:%d\n", \ hipGetErrorString(error), \ error, \ __FILE__, \ __LINE__); \ exit(EXIT_FAILURE); \ } \ } while(0) inline void hipsolver_expect_status(hipsolverStatus_t status, hipsolverStatus_t expected) { if(status != expected && status != HIPSOLVER_STATUS_NOT_SUPPORTED) { fprintf(stderr, "hipSOLVER status error: Expected: %s, Actual: %s\n", hipsolver2string_status(expected), hipsolver2string_status(status)); if(expected == HIPSOLVER_STATUS_SUCCESS) exit(EXIT_FAILURE); } } #define EXPECT_ROCBLAS_STATUS(status, expected) hipsolver_expect_status(status, expected) #define CHECK_ROCBLAS_ERROR(status) hipsolver_expect_status(status, HIPSOLVER_STATUS_SUCCESS) #endif #ifdef __cplusplus /* ============================================================================================ */ /*! \brief local handle which is automatically created and destroyed */ class hipsolver_local_handle { hipsolverHandle_t m_handle; public: hipsolver_local_handle() { hipsolverCreate(&m_handle); } ~hipsolver_local_handle() { hipsolverDestroy(m_handle); } hipsolver_local_handle(const hipsolver_local_handle&) = delete; hipsolver_local_handle(hipsolver_local_handle&&) = delete; hipsolver_local_handle& operator=(const hipsolver_local_handle&) = delete; hipsolver_local_handle& operator=(hipsolver_local_handle&&) = delete; // Allow hipsolver_local_handle to be used anywhere hipsolverHandle_t is expected operator hipsolverHandle_t&() { return m_handle; } operator const hipsolverHandle_t&() const { return m_handle; } }; /* ============================================================================================ */ /*! \brief local gesvdj params which is automatically created and destroyed */ class hipsolver_local_gesvdj_info { hipsolverGesvdjInfo_t m_info; public: hipsolver_local_gesvdj_info() { hipsolverDnCreateGesvdjInfo(&m_info); } ~hipsolver_local_gesvdj_info() { hipsolverDnDestroyGesvdjInfo(m_info); } hipsolver_local_gesvdj_info(const hipsolver_local_gesvdj_info&) = delete; hipsolver_local_gesvdj_info(hipsolver_local_gesvdj_info&&) = delete; hipsolver_local_gesvdj_info& operator=(const hipsolver_local_gesvdj_info&) = delete; hipsolver_local_gesvdj_info& operator=(hipsolver_local_gesvdj_info&&) = delete; // Allow hipsolver_local_gesvdj_info to be used anywhere hipsolverGesvdjInfo_t is expected operator hipsolverGesvdjInfo_t&() { return m_info; } operator const hipsolverGesvdjInfo_t&() const { return m_info; } }; /* ============================================================================================ */ /*! \brief local syevj params which is automatically created and destroyed */ class hipsolver_local_syevj_info { hipsolverSyevjInfo_t m_info; public: hipsolver_local_syevj_info() { hipsolverDnCreateSyevjInfo(&m_info); } ~hipsolver_local_syevj_info() { hipsolverDnDestroySyevjInfo(m_info); } hipsolver_local_syevj_info(const hipsolver_local_syevj_info&) = delete; hipsolver_local_syevj_info(hipsolver_local_syevj_info&&) = delete; hipsolver_local_syevj_info& operator=(const hipsolver_local_syevj_info&) = delete; hipsolver_local_syevj_info& operator=(hipsolver_local_syevj_info&&) = delete; // Allow hipsolver_local_syevj_info to be used anywhere hipsolverSyevjInfo_t is expected operator hipsolverSyevjInfo_t&() { return m_info; } operator const hipsolverSyevjInfo_t&() const { return m_info; } }; /* ============================================================================================ */ // Return true if value is NaN template inline bool hipsolver_isnan(T) { return false; } inline bool hipsolver_isnan(double arg) { return std::isnan(arg); } inline bool hipsolver_isnan(float arg) { return std::isnan(arg); } inline bool hipsolver_isnan(hipsolverComplex arg) { return std::isnan(arg.real()) || std::isnan(arg.imag()); } inline bool hipsolver_isnan(hipsolverDoubleComplex arg) { return std::isnan(arg.real()) || std::isnan(arg.imag()); } /* =============================================================================================== */ /* Complex / real helpers. */ /* Workaround for clang bug: https://bugs.llvm.org/show_bug.cgi?id=35863 */ #if __clang__ #define HIPSOLVER_CLANG_STATIC static #else #define HIPSOLVER_CLANG_STATIC #endif template static constexpr bool is_complex = false; // cppcheck-suppress syntaxError template <> HIPSOLVER_CLANG_STATIC constexpr bool is_complex = true; template <> HIPSOLVER_CLANG_STATIC constexpr bool is_complex = true; // Get base types from complex types. template struct real_t_impl { using type = T; }; template struct real_t_impl>> { using type = decltype(T{}.real()); }; template using real_t = typename real_t_impl::type; /* ============================================================================================ */ /*! \brief Random number generator which generates NaN values */ using hipsolver_rng_t = std::mt19937; extern hipsolver_rng_t hipsolver_rng, hipsolver_seed; // Reset the seed (mainly to ensure repeatability of failures in a given suite) inline void hipsolver_seedrand() { hipsolver_rng = hipsolver_seed; } class hipsolver_nan_rng { // Generate random NaN values template static T random_nan_data() { static_assert(sizeof(UINT_T) == sizeof(T), "Type sizes do not match"); union u_t { u_t() {} UINT_T u; T fp; } x; do x.u = std::uniform_int_distribution{}(hipsolver_rng); while(!(x.u & (((UINT_T)1 << SIG) - 1))); // Reject Inf (mantissa == 0) x.u |= (((UINT_T)1 << EXP) - 1) << SIG; // Exponent = all 1's return x.fp; // NaN with random bits } public: // Random integer template {}, int>::type = 0> explicit operator T() const { return std::uniform_int_distribution{}(hipsolver_rng); } // Random NaN float explicit operator float() const { return random_nan_data(); } // Random NaN double explicit operator double() const { return random_nan_data(); } // Random NaN Complex explicit operator hipsolverComplex() const { return {float(*this), float(*this)}; } // Random NaN Double Complex explicit operator hipsolverDoubleComplex() const { return {double(*this), double(*this)}; } }; /* ============================================================================================ */ /* generate random number :*/ /*! \brief generate a random number in range [1,2,3,4,5,6,7,8,9,10] */ template T random_generator() { // return rand()/( (T)RAND_MAX + 1); return T(rand() % 10 + 1); }; // for hipsolverComplex, generate 2 floats /*! \brief generate two random numbers in range [1,2,3,4,5,6,7,8,9,10] */ template <> inline hipsolverComplex random_generator() { return hipsolverComplex(rand() % 10 + 1, rand() % 10 + 1); } // for hipsolverDoubleComplex, generate 2 doubles /*! \brief generate two random numbers in range [1,2,3,4,5,6,7,8,9,10] */ template <> inline hipsolverDoubleComplex random_generator() { return hipsolverDoubleComplex(rand() % 10 + 1, rand() % 10 + 1); } /*! \brief generate a random number in range [-1,-2,-3,-4,-5,-6,-7,-8,-9,-10] */ template inline T random_generator_negative() { // return rand()/( (T)RAND_MAX + 1); return -T(rand() % 10 + 1); }; // for complex, generate two values, convert both to negative /*! \brief generate a random real value in range [-1, -10] and random * imaginary value in range [-1, -10] */ template <> inline hipsolverComplex random_generator_negative() { return {float(-(rand() % 10 + 1)), float(-(rand() % 10 + 1))}; } template <> inline hipsolverDoubleComplex random_generator_negative() { return {double(-(rand() % 10 + 1)), double(-(rand() % 10 + 1))}; } /* ============================================================================================ */ /* ============================================================================================ */ /*! \brief Packs strided_batched matricies into groups of 4 in N */ template void hipsolver_packInt8( std::vector& A, size_t M, size_t N, size_t lda, size_t batch_count = 1, size_t stride_a = 0) { std::vector temp(A); for(size_t b = 0; b < batch_count; b++) for(size_t colBase = 0; colBase < N; colBase += 4) for(size_t row = 0; row < lda; row++) for(size_t colOffset = 0; colOffset < 4; colOffset++) A[(colBase * lda + 4 * row) + colOffset + (stride_a * b)] = temp[(colBase + colOffset) * lda + row + (stride_a * b)]; } /* ============================================================================================ */ /* ============================================================================================ */ /*! \brief turn float -> 's', double -> 'd', hipsolverComplex -> 'c', hipsolverDoubleComplex * -> 'z' */ template char type2char(); /* ============================================================================================ */ /*! \brief turn float -> int, double -> int, hipsolverComplex.real() -> int, * hipsolverDoubleComplex.real() -> int */ template int type2int(T val); /* ============================================================================================ */ /*! \brief Debugging purpose, print out CPU and GPU result matrix, not valid in complex number */ template , int> = 0> void print_matrix(T* CPU_result, T* GPU_result, int m, int n, int lda) { for(int i = 0; i < m; i++) for(int j = 0; j < n; j++) printf("matrix col %d, row %d, CPU result=%.8g, GPU result=%.8g\n", j, i, double(CPU_result[i + j * lda]), double(GPU_result[i + j * lda])); } /*! \brief Debugging purpose, print out CPU and GPU result matrix, valid for complex number */ template , int> = 0> void print_matrix(T* CPU_result, T* GPU_result, int m, int n, int lda) { for(int i = 0; i < m; i++) for(int j = 0; j < n; j++) printf("matrix col %d, row %d, CPU result=(%.8g,%.8g), GPU result=(%.8g,%.8g)\n", j, i, double(CPU_result[i + j * lda].real()), double(CPU_result[i + j * lda].imag()), double(GPU_result[i + j * lda].real()), double(GPU_result[i + j * lda].imag())); } /* =============================================================================================== */ /* ============================================================================================ */ #endif // __cplusplus #ifdef __cplusplus extern "C" { #endif /* ============================================================================================ */ /* device query and print out their ID and name */ int query_device_property(); /* set current device to device_id */ void set_device(int device_id); /* ============================================================================================ */ /* timing: HIP only provides very limited timers function clock() and not general; hipsolver sync CPU and device and use more accurate CPU timer*/ /*! \brief CPU Timer(in microsecond): synchronize with the default device and return wall time */ double get_time_us(); /*! \brief CPU Timer(in microsecond): synchronize with given queue/stream and return wall time */ double get_time_us_sync(hipStream_t stream); /*! \brief CPU Timer(in microsecond): no GPU synchronization and return wall time */ double get_time_us_no_sync(); #ifdef __cplusplus } #endif /* ============================================================================================ */ hipSOLVER-rocm-5.5.1/clients/rocblascommon/000077500000000000000000000000001436107207300204755ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/clients/rocblascommon/d_vector.hpp000066400000000000000000000111011436107207300230050ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "hipsolver.h" #include "rocblas_init.hpp" //#include "rocblas_test.hpp" #include #include using rocblas_int = int; using rocblas_stride = ptrdiff_t; /* ============================================================================================ */ /*! \brief base-class to allocate/deallocate device memory */ template class d_vector { private: size_t size, bytes; public: inline size_t nmemb() const noexcept { return size; } #ifdef GOOGLE_TEST U guard[PAD]; d_vector(size_t s) : size(s) , bytes((s + PAD * 2) * sizeof(T)) { // Initialize guard with random data if(PAD > 0) { rocblas_init_nan(guard, PAD); } } #else d_vector(size_t s) : size(s) , bytes(s ? s * sizeof(T) : sizeof(T)) { } #endif T* device_vector_setup() { T* d; if((hipMalloc)(&d, bytes) != hipSuccess) { static char* lc = setlocale(LC_NUMERIC, ""); fprintf(stderr, "Error allocating %'zu bytes (%zu GB)\n", bytes, bytes >> 30); d = nullptr; } #ifdef GOOGLE_TEST else { if(PAD > 0) { // Copy guard to device memory before allocated memory hipMemcpy(d, guard, sizeof(guard), hipMemcpyHostToDevice); // Point to allocated block d += PAD; // Copy guard to device memory after allocated memory hipMemcpy(d + size, guard, sizeof(guard), hipMemcpyHostToDevice); } } #endif return d; } void device_vector_check(T* d) { #ifdef GOOGLE_TEST if(PAD > 0) { U host[PAD]; // Copy device memory after allocated memory to host hipMemcpy(host, d + this->size, sizeof(guard), hipMemcpyDeviceToHost); // Make sure no corruption has occurred EXPECT_EQ(memcmp(host, guard, sizeof(guard)), 0); // Point to guard before allocated memory d -= PAD; // Copy device memory after allocated memory to host hipMemcpy(host, d, sizeof(guard), hipMemcpyDeviceToHost); // Make sure no corruption has occurred EXPECT_EQ(memcmp(host, guard, sizeof(guard)), 0); } #endif } void device_vector_teardown(T* d) { if(d != nullptr) { #ifdef GOOGLE_TEST if(PAD > 0) { U host[PAD]; // Copy device memory after allocated memory to host hipMemcpy(host, d + this->size, sizeof(guard), hipMemcpyDeviceToHost); // Make sure no corruption has occurred EXPECT_EQ(memcmp(host, guard, sizeof(guard)), 0); // Point to guard before allocated memory d -= PAD; // Copy device memory after allocated memory to host hipMemcpy(host, d, sizeof(guard), hipMemcpyDeviceToHost); // Make sure no corruption has occurred EXPECT_EQ(memcmp(host, guard, sizeof(guard)), 0); } #endif // Free device memory CHECK_HIP_ERROR((hipFree)(d)); } } }; hipSOLVER-rocm-5.5.1/clients/rocblascommon/device_batch_vector.hpp000066400000000000000000000211521436107207300251710ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "d_vector.hpp" // // Local declaration of the host strided batch vector. // template class host_batch_vector; //! //! @brief pseudo-vector subclass which uses a batch of device memory pointers //! and //! - an array of pointers in host memory //! - an array of pointers in device memory //! template class device_batch_vector : private d_vector { public: using value_type = T; public: //! //! @brief Disallow copying. //! device_batch_vector(const device_batch_vector&) = delete; //! //! @brief Disallow assigning. //! device_batch_vector& operator=(const device_batch_vector&) = delete; //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param batch_count The batch count. //! explicit device_batch_vector(rocblas_int n, rocblas_int inc, rocblas_int batch_count) : m_n(n) , m_inc(inc) , m_batch_count(batch_count) , d_vector(size_t(n) * std::abs(inc)) { if(false == this->try_initialize_memory()) { this->free_memory(); } } //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param stride (UNUSED) The stride. //! @param batch_count The batch count. //! explicit device_batch_vector(rocblas_int n, rocblas_int inc, rocblas_stride stride, rocblas_int batch_count) : device_batch_vector(n, inc, batch_count) { } //! //! @brief Constructor (kept for backward compatibility only, to be removed). //! @param batch_count The number of vectors. //! @param size_vector The size of each vectors. //! explicit device_batch_vector(rocblas_int batch_count, size_t size_vector) : device_batch_vector(size_vector, 1, batch_count) { } //! //! @brief Destructor. //! ~device_batch_vector() { this->free_memory(); } //! //! @brief Returns the length of the vector. //! rocblas_int n() const { return this->m_n; } //! //! @brief Returns the increment of the vector. //! rocblas_int inc() const { return this->m_inc; } //! //! @brief Returns the value of batch_count. //! rocblas_int batch_count() const { return this->m_batch_count; } //! //! @brief Returns the stride value. //! rocblas_stride stride() const { return 0; } //! //! @brief Access to device data. //! @return Pointer to the device data. //! T** ptr_on_device() { return this->m_device_data; } //! //! @brief Const access to device data. //! @return Const pointer to the device data. //! const T* const* ptr_on_device() const { return this->m_device_data; } T** data() { return this->m_device_data; } const T* const* data() const { return this->m_device_data; } //! //! @brief Random access. //! @param batch_index The batch index. //! @return Pointer to the array on device. //! T* operator[](rocblas_int batch_index) { return this->m_data[batch_index]; } //! //! @brief Constant random access. //! @param batch_index The batch index. //! @return Constant pointer to the array on device. //! const T* operator[](rocblas_int batch_index) const { return this->m_data[batch_index]; } // clang-format off //! //! @brief Const cast of the data on host. //! operator const T* const*() const { return this->m_data; } //! //! @brief Cast of the data on host. //! operator T**() { return this->m_data; } // clang-format on //! //! @brief Tell whether ressources allocation failed. //! explicit operator bool() const { return nullptr != this->m_data; } //! //! @brief Copy from a host batched vector. //! @param that The host_batch_vector to copy. //! hipError_t transfer_from(const host_batch_vector& that) { hipError_t hip_err; // // Copy each vector. // for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { if(hipSuccess != (hip_err = hipMemcpy((*this)[batch_index], that[batch_index], sizeof(T) * this->nmemb(), hipMemcpyHostToDevice))) { return hip_err; } } return hipSuccess; } //! //! @brief Check if memory exists. //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. //! hipError_t memcheck() const { if(*this) return hipSuccess; else return hipErrorOutOfMemory; } private: rocblas_int m_n{}; rocblas_int m_inc{}; rocblas_int m_batch_count{}; T** m_data{}; T** m_device_data{}; //! //! @brief Try to allocate the ressources. //! @return true if success false otherwise. //! bool try_initialize_memory() { bool success = false; success = (hipSuccess == (hipMalloc)(&this->m_device_data, this->m_batch_count * sizeof(T*))); if(success) { success = (nullptr != (this->m_data = (T**)calloc(this->m_batch_count, sizeof(T*)))); if(success) { for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { success = (nullptr != (this->m_data[batch_index] = this->device_vector_setup())); if(!success) { break; } } if(success) { success = (hipSuccess == hipMemcpy(this->m_device_data, this->m_data, sizeof(T*) * this->m_batch_count, hipMemcpyHostToDevice)); } } } return success; } //! //! @brief Free the ressources, as much as we can. //! void free_memory() { if(nullptr != this->m_data) { for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { if(nullptr != this->m_data[batch_index]) { this->device_vector_teardown(this->m_data[batch_index]); this->m_data[batch_index] = nullptr; } } free(this->m_data); this->m_data = nullptr; } if(nullptr != this->m_device_data) { auto tmp_device_data = this->m_device_data; this->m_device_data = nullptr; CHECK_HIP_ERROR((hipFree)(tmp_device_data)); } } }; hipSOLVER-rocm-5.5.1/clients/rocblascommon/device_strided_batch_vector.hpp000066400000000000000000000157401436107207300267150ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once // // Local declaration of the host strided batch vector. // template class host_strided_batch_vector; //! //! @brief Implementation of a strided batched vector on device. //! template class device_strided_batch_vector : public d_vector { public: using value_type = T; public: //! //! @brief The storage type to use. //! typedef enum class estorage { block, interleave, } storage; //! //! @brief Disallow copying. //! device_strided_batch_vector(const device_strided_batch_vector&) = delete; //! //! @brief Disallow assigning. //! device_strided_batch_vector& operator=(const device_strided_batch_vector&) = delete; //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param stride The stride. //! @param batch_count The batch count. //! @param stg The storage format to use. //! explicit device_strided_batch_vector(rocblas_int n, rocblas_int inc, rocblas_stride stride, rocblas_int batch_count, storage stg = storage::block) : d_vector(calculate_nmemb(n, inc, stride, batch_count, stg)) , m_storage(stg) , m_n(n) , m_inc(inc) , m_stride(stride) , m_batch_count(batch_count) { bool valid_parameters = true; switch(this->m_storage) { case storage::block: { if(std::abs(this->m_stride) < this->m_n * std::abs(this->m_inc)) { valid_parameters = false; } break; } case storage::interleave: { if(std::abs(this->m_inc) < std::abs(this->m_stride) * this->m_batch_count) { valid_parameters = false; } break; } } if(valid_parameters) { this->m_data = this->device_vector_setup(); } } //! //! @brief Destructor. //! ~device_strided_batch_vector() { if(nullptr != this->m_data) { this->device_vector_teardown(this->m_data); this->m_data = nullptr; } } //! //! @brief Returns the data pointer. //! T* data() { return this->m_data; } //! //! @brief Returns the data pointer. //! const T* data() const { return this->m_data; } //! //! @brief Returns the length. //! rocblas_int n() const { return this->m_n; } //! //! @brief Returns the increment. //! rocblas_int inc() const { return this->m_inc; } //! //! @brief Returns the batch count. //! rocblas_int batch_count() const { return this->m_batch_count; } //! //! @brief Returns the stride value. //! rocblas_stride stride() const { return this->m_stride; } //! //! @brief Returns pointer. //! @param batch_index The batch index. //! @return A mutable pointer to the batch_index'th vector. //! T* operator[](rocblas_int batch_index) { return (this->m_stride >= 0) ? this->m_data + batch_index * this->m_stride : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; } //! //! @brief Returns non-mutable pointer. //! @param batch_index The batch index. //! @return A non-mutable mutable pointer to the batch_index'th vector. //! const T* operator[](rocblas_int batch_index) const { return (this->m_stride >= 0) ? this->m_data + batch_index * this->m_stride : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; } //! //! @brief Cast operator. //! @remark Returns the pointer of the first vector. //! operator T*() { return (*this)[0]; } //! //! @brief Non-mutable cast operator. //! @remark Returns the non-mutable pointer of the first vector. //! operator const T*() const { return (*this)[0]; } //! //! @brief Tell whether ressources allocation failed. //! explicit operator bool() const { return nullptr != this->m_data; } //! //! @brief Transfer data from a strided batched vector on device. //! @param that That strided batched vector on device. //! @return The hip error. //! hipError_t transfer_from(const host_strided_batch_vector& that) { return hipMemcpy( this->data(), that.data(), sizeof(T) * this->nmemb(), hipMemcpyHostToDevice); } //! //! @brief Check if memory exists. //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. //! hipError_t memcheck() const { if(*this) return hipSuccess; else return hipErrorOutOfMemory; } private: storage m_storage{storage::block}; rocblas_int m_n{}; rocblas_int m_inc{}; rocblas_stride m_stride{}; rocblas_int m_batch_count{}; T* m_data{}; static size_t calculate_nmemb( rocblas_int n, rocblas_int inc, rocblas_stride stride, rocblas_int batch_count, storage st) { switch(st) { case storage::block: return size_t(std::abs(stride)) * batch_count; case storage::interleave: return size_t(n) * std::abs(inc); } return 0; } }; hipSOLVER-rocm-5.5.1/clients/rocblascommon/device_vector.hpp000066400000000000000000000110761436107207300240340ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "d_vector.hpp" // // Local declaration of the host vector. // template class host_vector; //! //! @brief pseudo-vector subclass which uses device memory //! template class device_vector : private d_vector { public: using value_type = T; public: //! //! @brief Disallow copying. //! device_vector(const device_vector&) = delete; //! //! @brief Disallow assigning //! device_vector& operator=(const device_vector&) = delete; //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @remark Must wrap constructor and destructor in functions to allow Google //! Test macros to work //! explicit device_vector(rocblas_int n, rocblas_int inc) : d_vector(n * std::abs(inc)) , m_n(n) , m_inc(inc) { this->m_data = this->device_vector_setup(); } //! //! @brief Constructor (kept for backward compatibility) //! @param s the size. //! @remark Must wrap constructor and destructor in functions to allow Google //! Test macros to work //! explicit device_vector(size_t s) : d_vector(s) , m_n(s) , m_inc(1) { this->m_data = this->device_vector_setup(); } //! //! @brief Destructor. //! ~device_vector() { this->device_vector_teardown(this->m_data); this->m_data = nullptr; } //! //! @brief Returns the length of the vector. //! rocblas_int n() const { return this->m_n; } //! //! @brief Returns the increment of the vector. //! rocblas_int inc() const { return this->m_inc; } //! //! @brief Returns the batch count (always 1). //! rocblas_int batch_count() const { return 1; } //! //! @brief Returns the stride (out of context, always 0) //! rocblas_stride stride() const { return 0; } //! //! @brief Returns the data pointer. //! T* data() { return this->m_data; } //! //! @brief Returns the data pointer. //! const T* data() const { return this->m_data; } //! //! @brief Decay into pointer wherever pointer is expected. //! operator T*() { return this->m_data; } //! //! @brief Decay into constant pointer wherever pointer is expected. //! operator const T*() const { return this->m_data; } //! //! @brief Tell whether malloc failed. //! explicit operator bool() const { return nullptr != this->m_data; } //! //! @brief Transfer data from a host vector. //! @param that The host vector. //! @return the hip error. //! hipError_t transfer_from(const host_vector& that) { return hipMemcpy( this->m_data, (const T*)that, this->nmemb() * sizeof(T), hipMemcpyHostToDevice); } hipError_t memcheck() const { if(*this) return hipSuccess; else return hipErrorOutOfMemory; } private: size_t m_size{}; rocblas_int m_n{}; rocblas_int m_inc{}; T* m_data{}; }; hipSOLVER-rocm-5.5.1/clients/rocblascommon/host_batch_vector.hpp000066400000000000000000000160711436107207300247130ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once // // Local declaration of the device batch vector. // template class device_batch_vector; //! //! @brief Implementation of the batch vector on host. //! template class host_batch_vector { public: using value_type = T; public: //! //! @brief Delete copy constructor. //! host_batch_vector(const host_batch_vector& that) = delete; //! //! @brief Delete copy assignement. //! host_batch_vector& operator=(const host_batch_vector& that) = delete; //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param batch_count The batch count. //! explicit host_batch_vector(rocblas_int n, rocblas_int inc, rocblas_int batch_count) : m_n(n) , m_inc(inc) , m_batch_count(batch_count) { if(false == this->try_initialize_memory()) { this->free_memory(); } } //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param stride (UNUSED) The stride. //! @param batch_count The batch count. //! explicit host_batch_vector(rocblas_int n, rocblas_int inc, rocblas_stride stride, rocblas_int batch_count) : host_batch_vector(n, inc, batch_count) { } //! //! @brief Destructor. //! ~host_batch_vector() { this->free_memory(); } //! //! @brief Returns the length of the vector. //! rocblas_int n() const { return this->m_n; } //! //! @brief Returns the increment of the vector. //! rocblas_int inc() const { return this->m_inc; } //! //! @brief Returns the batch count. //! rocblas_int batch_count() const { return this->m_batch_count; } //! //! @brief Returns the stride value. //! rocblas_stride stride() const { return 0; } //! //! @brief Random access to the vectors. //! @param batch_index the batch index. //! @return The mutable pointer. //! T* operator[](rocblas_int batch_index) { return this->m_data[batch_index]; } //! //! @brief Constant random access to the vectors. //! @param batch_index the batch index. //! @return The non-mutable pointer. //! const T* operator[](rocblas_int batch_index) const { return this->m_data[batch_index]; } // clang-format off //! //! @brief Cast to a double pointer. //! operator T**() { return this->m_data; } //! //! @brief Constant cast to a double pointer. //! operator const T* const*() { return this->m_data; } // clang-format on //! //! @brief Copy from a host batched vector. //! @param that the vector the data is copied from. //! @return true if the copy is done successfully, false otherwise. //! bool copy_from(const host_batch_vector& that) { if((this->batch_count() == that.batch_count()) && (this->n() == that.n()) && (this->inc() == that.inc())) { size_t num_bytes = this->n() * std::abs(this->inc()) * sizeof(T); for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { memcpy((*this)[batch_index], that[batch_index], num_bytes); } return true; } else { return false; } } //! //! @brief Transfer from a device batched vector. //! @param that the vector the data is copied from. //! @return the hip error. //! hipError_t transfer_from(const device_batch_vector& that) { hipError_t hip_err; size_t num_bytes = size_t(this->m_n) * std::abs(this->m_inc) * sizeof(T); for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { if(hipSuccess != (hip_err = hipMemcpy( (*this)[batch_index], that[batch_index], num_bytes, hipMemcpyDeviceToHost))) { return hip_err; } } return hipSuccess; } //! //! @brief Check if memory exists. //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. //! hipError_t memcheck() const { return (nullptr != this->m_data) ? hipSuccess : hipErrorOutOfMemory; } private: rocblas_int m_n{}; rocblas_int m_inc{}; rocblas_int m_batch_count{}; T** m_data{}; bool try_initialize_memory() { bool success = (nullptr != (this->m_data = (T**)calloc(this->m_batch_count, sizeof(T*)))); if(success) { size_t nmemb = size_t(this->m_n) * std::abs(this->m_inc); for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { success = (nullptr != (this->m_data[batch_index] = (T*)calloc(nmemb, sizeof(T)))); if(false == success) { break; } } } return success; } void free_memory() { if(nullptr != this->m_data) { for(rocblas_int batch_index = 0; batch_index < this->m_batch_count; ++batch_index) { if(nullptr != this->m_data[batch_index]) { free(this->m_data[batch_index]); this->m_data[batch_index] = nullptr; } } free(this->m_data); this->m_data = nullptr; } } }; hipSOLVER-rocm-5.5.1/clients/rocblascommon/host_strided_batch_vector.hpp000066400000000000000000000172101436107207300264250ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once // // Local declaration of the device strided batch vector. // template class device_strided_batch_vector; //! //! @brief Implementation of a host strided batched vector. //! template class host_strided_batch_vector { public: using value_type = T; public: //! //! @brief The storage type to use. //! typedef enum class estorage { block, interleave } storage; //! //! @brief Disallow copying. //! host_strided_batch_vector(const host_strided_batch_vector&) = delete; //! //! @brief Disallow assigning. //! host_strided_batch_vector& operator=(const host_strided_batch_vector&) = delete; //! //! @brief Constructor. //! @param n The length of the vector. //! @param inc The increment. //! @param stride The stride. //! @param batch_count The batch count. //! @param stg The storage format to use. //! explicit host_strided_batch_vector(rocblas_int n, rocblas_int inc, rocblas_stride stride, rocblas_int batch_count, storage stg = storage::block) : m_storage(stg) , m_n(n) , m_inc(inc) , m_stride(stride) , m_batch_count(batch_count) , m_nmemb(calculate_nmemb(n, inc, stride, batch_count, stg)) { bool valid_parameters = this->m_nmemb > 0; if(valid_parameters) { switch(this->m_storage) { case storage::block: { if(std::abs(this->m_stride) < this->m_n * std::abs(this->m_inc)) { valid_parameters = false; } break; } case storage::interleave: { if(std::abs(this->m_inc) < std::abs(this->m_stride) * this->m_batch_count) { valid_parameters = false; } break; } } if(valid_parameters) { this->m_data = new T[this->m_nmemb]; } } } //! //! @brief Destructor. //! ~host_strided_batch_vector() { if(nullptr != this->m_data) { delete[] this->m_data; this->m_data = nullptr; } } //! //! @brief Returns the data pointer. //! T* data() { return this->m_data; } //! //! @brief Returns the data pointer. //! const T* data() const { return this->m_data; } //! //! @brief Returns the length. //! rocblas_int n() const { return this->m_n; } //! //! @brief Returns the increment. //! rocblas_int inc() const { return this->m_inc; } //! //! @brief Returns the batch count. //! rocblas_int batch_count() const { return this->m_batch_count; } //! //! @brief Returns the stride. //! rocblas_stride stride() const { return this->m_stride; } //! //! @brief Returns pointer. //! @param batch_index The batch index. //! @return A mutable pointer to the batch_index'th vector. //! T* operator[](rocblas_int batch_index) { return (this->m_stride >= 0) ? this->m_data + this->m_stride * batch_index : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; } //! //! @brief Returns non-mutable pointer. //! @param batch_index The batch index. //! @return A non-mutable mutable pointer to the batch_index'th vector. //! const T* operator[](rocblas_int batch_index) const { return (this->m_stride >= 0) ? this->m_data + this->m_stride * batch_index : this->m_data + (batch_index + 1 - this->m_batch_count) * this->m_stride; } //! //! @brief Cast operator. //! @remark Returns the pointer of the first vector. //! operator T*() { return (*this)[0]; } //! //! @brief Non-mutable cast operator. //! @remark Returns the non-mutable pointer of the first vector. //! operator const T*() const { return (*this)[0]; } //! //! @brief Tell whether ressources allocation failed. //! explicit operator bool() const { return nullptr != this->m_data; } //! //! @brief Copy data from a strided batched vector on host. //! @param that That strided batched vector on host. //! @return true if successful, false otherwise. //! bool copy_from(const host_strided_batch_vector& that) { if(that.n() == this->m_n && that.inc() == this->m_inc && that.stride() == this->m_stride && that.batch_count() == this->m_batch_count) { memcpy(this->data(), that.data(), sizeof(T) * this->m_nmemb); return true; } else { return false; } } //! //! @brief Transfer data from a strided batched vector on device. //! @param that That strided batched vector on device. //! @return The hip error. //! template hipError_t transfer_from(const device_strided_batch_vector& that) { return hipMemcpy( this->m_data, that.data(), sizeof(T) * this->m_nmemb, hipMemcpyDeviceToHost); } //! //! @brief Check if memory exists. //! @return hipSuccess if memory exists, hipErrorOutOfMemory otherwise. //! hipError_t memcheck() const { return ((bool)*this) ? hipSuccess : hipErrorOutOfMemory; } private: storage m_storage{storage::block}; rocblas_int m_n{}; rocblas_int m_inc{}; rocblas_stride m_stride{}; rocblas_int m_batch_count{}; size_t m_nmemb{}; T* m_data{}; static size_t calculate_nmemb( rocblas_int n, rocblas_int inc, rocblas_stride stride, rocblas_int batch_count, storage st) { switch(st) { case storage::block: return size_t(std::abs(stride)) * batch_count; case storage::interleave: return size_t(n) * std::abs(inc); } return 0; } }; hipSOLVER-rocm-5.5.1/clients/rocblascommon/host_vector.hpp000066400000000000000000000067041436107207300235540ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include #include #include //! //! @brief Pseudo-vector subclass which uses host memory. //! template struct host_vector : std::vector { // Inherit constructors using std::vector::vector; //! //! @brief Constructor. //! host_vector(size_t n, ptrdiff_t inc) : std::vector(n * std::abs(inc)) , m_n(n) , m_inc(inc) { } //! //! @brief Copy constructor from host_vector of other types convertible to T //! template {}, int> = 0> host_vector(const host_vector& x) : std::vector(x.size()) , m_n(x.size()) , m_inc(1) { for(size_t i = 0; i < m_n; ++i) (*this)[i] = x[i]; } //! //! @brief Decay into pointer wherever pointer is expected //! operator T*() { return this->data(); } //! //! @brief Decay into constant pointer wherever constant pointer is expected //! operator const T*() const { return this->data(); } //! //! @brief Transfer from a device vector. //! @param that That device vector. //! @return the hip error. //! hipError_t transfer_from(const device_vector& that) { return hipMemcpy(*this, that, sizeof(T) * this->size(), hipMemcpyDeviceToHost); } //! //! @brief Returns the length of the vector. //! size_t n() const { return m_n; } //! //! @brief Returns the increment of the vector. //! ptrdiff_t inc() const { return m_inc; } //! //! @brief Returns the batch count (always 1). //! static constexpr rocblas_int batch_count() { return 1; } //! //! @brief Returns the stride (out of context, always 0) //! static constexpr rocblas_stride stride() { return 0; } //! //! @brief Check if memory exists (out of context, always hipSuccess) //! static constexpr hipError_t memcheck() { return hipSuccess; } private: size_t m_n = 0; ptrdiff_t m_inc = 0; }; hipSOLVER-rocm-5.5.1/clients/rocblascommon/program_options.hpp000066400000000000000000000427531436107207300244430ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ // This emulates the required functionality of boost::program_options #pragma once #include #include #include #include #include #include #include #include #include #include #include namespace roc { // Regular expression for token delimiters (whitespace and commas) static const std::regex program_options_regex{"[, \\f\\n\\r\\t\\v]+", std::regex_constants::optimize}; // Polymorphic base class to use with dynamic_cast class value_base { protected: bool m_has_actual = false; bool m_has_default = false; public: virtual ~value_base() = default; bool has_actual() const { return m_has_actual; } bool has_default() const { return m_has_default; } }; // Value parameters template class value : public value_base { T m_var; // Variable to be modified if no pointer provided T* m_var_ptr; // Pointer to variable to be modified public: // Constructor explicit value() : m_var_ptr(nullptr) { } explicit value(const T& var, bool defaulted) : m_var(var) , m_var_ptr(nullptr) { m_has_actual = !defaulted; m_has_default = defaulted; } explicit value(T* var_ptr) : m_var_ptr(var_ptr) { } // Allows actual_value() and default_value() value* operator->() { return this; } // Get the value const T& get_value() const { if(m_var_ptr) return *m_var_ptr; else return m_var; } // Set actual value value& actual_value(T val) { if(m_var_ptr) *m_var_ptr = std::move(val); else m_var = std::move(val); m_has_actual = true; return *this; } // Set default value value& default_value(T val) { if(!m_has_actual) { if(m_var_ptr) *m_var_ptr = std::move(val); else m_var = std::move(val); m_has_default = true; } return *this; } }; // bool_switch is a value, which is handled specially using bool_switch = value; class variable_value { std::shared_ptr m_val; public: // Constructor explicit variable_value() = default; template explicit variable_value(const T& xv, bool xdefaulted) : m_val(std::make_shared>(xv, xdefaulted)) { } explicit variable_value(std::shared_ptr val) : m_val(val) { } // Member functions bool empty() const { return !m_val.get() || (!m_val->has_actual() && !m_val->has_default()); } bool defaulted() const { return m_val.get() && !m_val->has_actual() && m_val->has_default(); } template const T& as() const { if(value* val = dynamic_cast*>(m_val.get())) return val->get_value(); else throw std::logic_error("Internal error: Invalid cast"); } }; using variables_map = std::map; class options_description { // desc_option describes a particular option class desc_option { std::string m_opts; std::shared_ptr m_val; std::string m_desc; public: // Constructor with options, value and description template desc_option(std::string opts, value val, std::string desc) : m_opts(std::move(opts)) , m_val(new auto(std::move(val))) , m_desc(std::move(desc)) { } // Constructor with options and description desc_option(std::string opts, std::string desc) : m_opts(std::move(opts)) , m_val(nullptr) , m_desc(std::move(desc)) { } // Copy constructor is deleted desc_option(const desc_option&) = delete; // Move constructor desc_option(desc_option&& other) = default; // Accessors const std::string& get_opts() const { return m_opts; } const std::shared_ptr get_val() const { return m_val; } const std::string& get_desc() const { return m_desc; } // Set a value void set_val(int& argc, char**& argv, const std::string& inopt) const { // We test all supported types with dynamic_cast and parse accordingly bool match = false; if(auto* ptr = dynamic_cast*>(m_val.get())) { int32_t val; match = argc && sscanf(*argv, "%" SCNd32, &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { uint32_t val; match = argc && sscanf(*argv, "%" SCNu32, &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { int64_t val; match = argc && sscanf(*argv, "%" SCNd64, &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { uint64_t val; match = argc && sscanf(*argv, "%" SCNu64, &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { float val; match = argc && sscanf(*argv, "%f", &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { double val; match = argc && sscanf(*argv, "%lf", &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { char val; match = argc && sscanf(*argv, " %c", &val) == 1; ptr->actual_value(val); } else if(auto* ptr = dynamic_cast*>(m_val.get())) { // We handle bool specially, setting the value to true without argument ptr->actual_value(true); return; } else if(auto* ptr = dynamic_cast*>(m_val.get())) { if(argc) { ptr->actual_value(*argv); match = true; } } else { throw std::logic_error("Internal error: Unsupported data type"); } if(!match) throw std::invalid_argument(argc ? "Invalid value for " + inopt : "Missing required value for " + inopt); // Skip past the argument's value ++argv; --argc; } }; // Description and option list std::string m_desc; std::vector m_optlist; // desc_optionlist allows chains of options to be parenthesized class desc_optionlist { std::vector& m_list; public: explicit desc_optionlist(std::vector& list) : m_list(list) { } template desc_optionlist operator()(Ts&&... arg) { m_list.push_back(desc_option(std::forward(arg)...)); return *this; } }; // Parse an option at the current (argc, argv) position void parse_option(int& argc, char**& argv, variables_map& vm, bool ignoreUnknown) const { // Iterate across all options for(const auto& opt : m_optlist) { // Canonical name used for map std::string canonical_name; // Iterate across tokens in the opts for(std::sregex_token_iterator tok{ opt.get_opts().begin(), opt.get_opts().end(), program_options_regex, -1}; tok != std::sregex_token_iterator(); ++tok) { // The first option in a list of options is the canonical name if(!canonical_name.length()) canonical_name = tok->str(); // If the length of the option is 1, it is single-dash; otherwise double-dash const char* prefix = tok->length() == 1 ? "-" : "--"; // If option matches if(*argv == prefix + tok->str()) { ++argv; --argc; // If option has a value, set it if(opt.get_val().get()) opt.set_val(argc, argv, prefix + tok->str()); // Add seen options to map vm[canonical_name] = variable_value(opt.get_val()); return; // Return successfully } } } // No options were matched if(ignoreUnknown) { ++argv; --argc; } else throw std::invalid_argument("Option " + std::string(argv[0]) + " is not defined."); } public: // Constructor explicit options_description(std::string desc) : m_desc(std::move(desc)) { } // Start a desc_optionlist chain desc_optionlist add_options() & { return desc_optionlist(m_optlist); } // Parse all options void parse_options(int& argc, char**& argv, variables_map& vm, bool ignoreUnknown = false) const { // Add options with default values to map for(const auto& opt : m_optlist) { std::sregex_token_iterator tok{ opt.get_opts().begin(), opt.get_opts().end(), program_options_regex, -1}; // Canonical name used for map std::string canonical_name = tok->str(); if(opt.get_val().get() && opt.get_val()->has_default()) vm[canonical_name] = variable_value(opt.get_val()); } // Parse options while(argc) parse_option(argc, argv, vm, ignoreUnknown); } // Formatted output of command-line arguments description friend std::ostream& operator<<(std::ostream& os, const options_description& d) { // Iterate across all options for(const auto& opt : d.m_optlist) { bool first = true, printvalue = true; const char* delim = ""; std::ostringstream left; // Iterate across tokens in the opts for(std::sregex_token_iterator tok{opt.get_opts().begin(), opt.get_opts().end(), program_options_regex, -1}; tok != std::sregex_token_iterator(); ++tok, first = false, delim = " ") { // If the length of the option is 1, it is single-dash; otherwise double-dash const char* prefix = tok->length() == 1 ? "-" : "--"; left << delim << (first ? "" : "|") << prefix << tok->str(); if(tok->str() == "help" || tok->str() == "h") printvalue = false; } if(printvalue) left << " "; os << std::setw(26) << std::left << left.str() << " " << opt.get_desc() << " "; left.str(std::string()); // Print the default value of the variable type if it exists // We do not print the default value for bool const value_base* val = opt.get_val().get(); if(val && !dynamic_cast*>(val)) { if(val->has_default()) { // We test all supported types with dynamic_cast and print accordingly left << " (Default value is: "; if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else if(dynamic_cast*>(val)) left << dynamic_cast*>(val)->get_value(); else throw std::logic_error("Internal error: Unsupported data type"); left << ")"; } } os << left.str() << "\n\n"; } return os << std::flush; } }; // Class representing command line parser class parse_command_line { variables_map m_vm; public: parse_command_line(int argc, char** argv, const options_description& desc, bool ignoreUnknown = false) { ++argv; // Skip argv[0] --argc; desc.parse_options(argc, argv, m_vm, ignoreUnknown); } // Copy the variables_map friend void store(const parse_command_line& p, variables_map& vm) { vm = p.m_vm; } // Move the variables_map friend void store(parse_command_line&& p, variables_map& vm) { vm = std::move(p.m_vm); } }; // We can define the notify() function as a no-op for our purposes inline void notify(const variables_map&) {} } hipSOLVER-rocm-5.5.1/clients/rocblascommon/rocblas_init.hpp000066400000000000000000000253321436107207300236630ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "hipsolver.h" //#include "rocblas_ostream.hpp" // #include "rocblas_math.hpp" // #include "rocblas_random.hpp" #include "../include/utility.hpp" #include #include #include /* ============================================================================================ */ /*! \brief matrix/vector initialization: */ // for vector x (M=1, N=lengthX, lda=incx); // for complex number, the real/imag part would be initialized with the same // value // Initialize vector with random values template void rocblas_init( std::vector& A, size_t M, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) A[i + j * lda + i_batch * stride] = random_generator(); } // Initialize vector with random values template inline void rocblas_init(T* A, size_t M, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) A[i + j * lda + i_batch * stride] = random_generator(); } template void rocblas_init_sin( std::vector& A, size_t M, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) A[i + j * lda + i_batch * stride] = sin(i + j * lda + i_batch * stride); } // Initialize matrix so adjacent entries have alternating sign. // In gemm if either A or B are initialized with alernating // sign the reduction sum will be summing positive // and negative numbers, so it should not get too large. // This helps reduce floating point inaccuracies for 16bit // arithmetic where the exponent has only 5 bits, and the // mantissa 10 bits. template void rocblas_init_alternating_sign( std::vector& A, size_t M, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) { auto value = random_generator(); A[i + j * lda + i_batch * stride] = (i ^ j) & 1 ? value : negate(value); } } template void rocblas_init_alternating_sign( T* A, size_t M, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) { auto value = random_generator(); A[i + j * lda + i_batch * stride] = (i ^ j) & 1 ? value : negate(value); } } template void rocblas_init_cos( std::vector& A, size_t M, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) A[i + j * lda + i_batch * stride] = cos(i + j * lda + i_batch * stride); } /*! \brief symmetric matrix initialization: */ // for real matrix only template void rocblas_init_symmetric(std::vector& A, size_t N, size_t lda) { for(size_t i = 0; i < N; ++i) for(size_t j = 0; j <= i; ++j) { auto value = random_generator(); // Warning: It's undefined behavior to assign to the // same array element twice in same sequence point (i==j) A[j + i * lda] = value; A[i + j * lda] = value; } } /*! \brief symmetric matrix initialization: */ template void rocblas_init_symmetric(T* A, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t b = 0; b < batch_count; ++b) { for(size_t i = 0; i < N; ++i) for(size_t j = 0; j <= i; ++j) { auto value = random_generator(); // Warning: It's undefined behavior to assign to the // same array element twice in same sequence point (i==j) A[b * stride + j + i * lda] = value; A[b * stride + i + j * lda] = value; } } } /*! \brief symmetric matrix clear: */ template void rocblas_clear_symmetric( hipsolverFillMode_t uplo, T* A, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t b = 0; b < batch_count; ++b) { for(size_t i = 0; i < N; ++i) for(size_t j = i + 1; j < N; ++j) { if(uplo == HIPSOLVER_FILL_MODE_UPPER) A[b * stride + j + i * lda] = 0; // clear lower else A[b * stride + i + j * lda] = 0; // clear upper } } } /*! \brief hermitian matrix initialization: */ // for complex matrix only, the real/imag part would be initialized with the // same value except the diagonal elment must be real template void rocblas_init_hermitian(std::vector& A, size_t N, size_t lda) { for(size_t i = 0; i < N; ++i) for(size_t j = 0; j <= i; ++j) { auto value = random_generator(); A[j + i * lda] = value; value.y = (i == j) ? 0 : negate(value.y); A[i + j * lda] = value; } } // // Initialize vector with HPL-like random values // template // void rocblas_init_hpl(std::vector& A, // size_t M, // size_t N, // size_t lda, // size_t stride = 0, // size_t batch_count = 1) // { // for(size_t i_batch = 0; i_batch < batch_count; i_batch++) // for(size_t i = 0; i < M; ++i) // for(size_t j = 0; j < N; ++j) // A[i + j * lda + i_batch * stride] = random_hpl_generator(); // } /* ============================================================================================ */ /*! \brief Initialize an array with random data, with NaN where appropriate */ template void rocblas_init_nan(T* A, size_t N) { for(size_t i = 0; i < N; ++i) A[i] = T(hipsolver_nan_rng()); } template void rocblas_init_nan( std::vector& A, size_t M, size_t N, size_t lda, size_t stride = 0, size_t batch_count = 1) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) A[i + j * lda + i_batch * stride] = T(hipsolver_nan_rng()); } /* ============================================================================================ */ /*! \brief Packs strided_batched matricies into groups of 4 in N */ template void rocblas_packInt8( std::vector& A, size_t M, size_t N, size_t batch_count, size_t lda, size_t stride_a) { // if(N % 4 != 0) // rocblas_cerr << "ERROR: dimension must be a multiple of 4 in order to pack" << std::endl; std::vector temp(A); for(size_t count = 0; count < batch_count; count++) for(size_t colBase = 0; colBase < N; colBase += 4) for(size_t row = 0; row < lda; row++) for(size_t colOffset = 0; colOffset < 4; colOffset++) A[(colBase * lda + 4 * row) + colOffset + (stride_a * count)] = temp[(colBase + colOffset) * lda + row + (stride_a * count)]; } /* ============================================================================================ */ /*! \brief Packs matricies into groups of 4 in N */ template void rocblas_packInt8(std::vector& A, size_t M, size_t N, size_t lda) { /* Assumes original matrix provided in column major order, where N is a multiple of 4 ---------- N ---------- | | 00 05 10 15 20 25 30 35 |00 05 10 15|20 25 30 35| | | 01 06 11 16 21 26 31 36 |01 06 11 16|21 26 31 36| l M 02 07 12 17 22 27 32 37 --> |02 07 12 17|22 27 32 37| d | 03 08 13 18 23 28 33 38 |03 08 13 18|23 28 33 38| a | 04 09 14 19 24 29 34 39 |04 09 14 19|24 29 34 39| | ** ** ** ** ** ** ** ** |** ** ** **|** ** ** **| | ** ** ** ** ** ** ** ** |** ** ** **|** ** ** **| Input : 00 01 02 03 04 ** ** 05 ... 38 39 ** ** Output: 00 05 10 15 01 06 11 16 ... ** ** ** ** */ // call general code with batch_count = 1 and stride_a = 0 rocblas_packInt8(A, M, N, 1, lda, 0); } /* ============================================================================================ */ /*! \brief matrix matrix initialization: copies from A into same position in B */ template void rocblas_copy_matrix(const T* A, T* B, size_t M, size_t N, size_t lda, size_t ldb, size_t stridea = 0, size_t strideb = 0, size_t batch_count = 1) { for(size_t i_batch = 0; i_batch < batch_count; i_batch++) for(size_t i = 0; i < M; ++i) for(size_t j = 0; j < N; ++j) B[i + j * ldb + i_batch * strideb] = A[i + j * lda + i_batch * stridea]; } hipSOLVER-rocm-5.5.1/clients/rocblascommon/rocblas_vector.hpp000066400000000000000000000126151436107207300242220ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "d_vector.hpp" #include "device_batch_vector.hpp" #include "device_strided_batch_vector.hpp" #include "device_vector.hpp" #include "host_batch_vector.hpp" // #include "host_pinned_vector.hpp" #include "host_strided_batch_vector.hpp" #include "host_vector.hpp" //! //! @brief Random number with type deductions. //! template void random_generator(T& n) { n = random_generator(); } //! //! //! template void random_nan_generator(T& n) { n = T(hipsolver_nan_rng()); } //! //! @brief Template for initializing a host //! (non_batched|batched|strided_batched)vector. //! @param that That vector. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template void rocblas_init_template(U& that, bool seedReset = false) { if(seedReset) { hipsolver_seedrand(); } for(rocblas_int batch_index = 0; batch_index < that.batch_count(); ++batch_index) { auto batched_data = that[batch_index]; auto inc = std::abs(that.inc()); auto n = that.n(); if(inc < 0) { batched_data -= (n - 1) * inc; } for(rocblas_int i = 0; i < n; ++i) { random_generator(batched_data[i * inc]); } } } //! //! @brief Template for initializing a host //! (non_batched|batched|strided_batched)vector with NaNs. //! @param that That vector. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template void rocblas_init_nan_template(U& that, bool seedReset = false) { if(seedReset) { hipsolver_seedrand(); } for(rocblas_int batch_index = 0; batch_index < that.batch_count(); ++batch_index) { auto batched_data = that[batch_index]; auto inc = std::abs(that.inc()); auto n = that.n(); if(inc < 0) { batched_data -= (n - 1) * inc; } for(rocblas_int i = 0; i < n; ++i) { random_nan_generator(batched_data[i * inc]); } } } //! //! @brief Initialize a host_strided_batch_vector. //! @param that The host strided batch vector. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template void rocblas_init(host_strided_batch_vector& that, bool seedReset = false) { rocblas_init_template(that, seedReset); } //! //! @brief Initialize a host_batch_vector. //! @param that The host batch vector. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template void rocblas_init(host_batch_vector& that, bool seedReset = false) { rocblas_init_template(that, seedReset); } //! //! @brief Initialize a host_vector. //! @param that The host vector. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template void rocblas_init(host_vector& that, bool seedReset = false) { if(seedReset) { hipsolver_seedrand(); } rocblas_init(that, 1, that.size(), 1); } //! //! @brief Initialize a host_strided_batch_vector with NaNs. //! @param that The host strided batch vector to be initialized. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template void rocblas_init_nan(host_strided_batch_vector& that, bool seedReset = false) { rocblas_init_nan_template(that, seedReset); } //! //! @brief Initialize a host_strided_batch_vector with NaNs. //! @param that The host strided batch vector to be initialized. //! @param seedReset reset the seed if true, do not reset the seed otherwise. //! template void rocblas_init_nan(host_batch_vector& that, bool seedReset = false) { rocblas_init_nan_template(that, seedReset); } //! //! @brief Initialize a host_strided_batch_vector with NaNs. //! @param that The host strided batch vector to be initialized. //! @param seedReset reset he seed if true, do not reset the seed otherwise. //! template void rocblas_init_nan(host_vector& that, bool seedReset = false) { rocblas_init_nan_template(that, seedReset); } hipSOLVER-rocm-5.5.1/clients/rocsolvercommon/000077500000000000000000000000001436107207300210665ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/clients/rocsolvercommon/norm.hpp000066400000000000000000000166741436107207300225700ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once // #include "clientcommon.hpp" // #include "rocblas.h" #include "../include/complex.hpp" #include "hipsolver.h" using rocblas_float_complex = hipsolverComplex; using rocblas_double_complex = hipsolverDoubleComplex; /* LAPACK fortran library functionality */ extern "C" { float slange_(char* norm_type, int* m, int* n, float* A, int* lda, float* work); double dlange_(char* norm_type, int* m, int* n, double* A, int* lda, double* work); float clange_(char* norm_type, int* m, int* n, rocblas_float_complex* A, int* lda, float* work); double zlange_(char* norm_type, int* m, int* n, rocblas_double_complex* A, int* lda, double* work); void daxpy_(int* n, double* alpha, double* x, int* incx, double* y, int* incy); void zaxpy_(int* n, rocblas_double_complex* alpha, rocblas_double_complex* x, int* incx, rocblas_double_complex* y, int* incy); } inline float xlange(char* norm_type, int* m, int* n, float* A, int* lda, float* work) { return slange_(norm_type, m, n, A, lda, work); } inline double xlange(char* norm_type, int* m, int* n, double* A, int* lda, double* work) { return dlange_(norm_type, m, n, A, lda, work); } inline float xlange(char* norm_type, int* m, int* n, rocblas_float_complex* A, int* lda, float* work) { return clange_(norm_type, m, n, A, lda, work); } inline double xlange(char* norm_type, int* m, int* n, rocblas_double_complex* A, int* lda, double* work) { return zlange_(norm_type, m, n, A, lda, work); } inline void xaxpy(int* n, double* alpha, double* x, int* incx, double* y, int* incy) { return daxpy_(n, alpha, x, incx, y, incy); } inline void xaxpy(int* n, rocblas_double_complex* alpha, rocblas_double_complex* x, int* incx, rocblas_double_complex* y, int* incy) { return zaxpy_(n, alpha, x, incx, y, incy); } /* Norm of error functions */ template , int> = 0> double norm_error(char norm_type, rocblas_int M, rocblas_int N, rocblas_int lda_gold, T* gold, T* comp, rocblas_int lda_comp = 0) { // norm type can be 'O', 'I', 'F', 'o', 'i', 'f' for one, infinity or // Frobenius norm one norm is max column sum infinity norm is max row sum // Frobenius is l2 norm of matrix entries rocblas_int lda = M; lda_comp = lda_comp > 0 ? lda_comp : lda_gold; host_vector gold_double(N * lda); host_vector comp_double(N * lda); for(rocblas_int i = 0; i < M; i++) { for(rocblas_int j = 0; j < N; j++) { gold_double[i + j * lda] = double(gold[i + j * lda_gold]); comp_double[i + j * lda] = double(comp[i + j * lda_comp]); } } double work[M]; rocblas_int incx = 1; double alpha = -1.0; rocblas_int size = lda * N; double gold_norm = xlange(&norm_type, &M, &N, gold_double.data(), &lda, work); xaxpy(&size, &alpha, gold_double.data(), &incx, comp_double.data(), &incx); double error = xlange(&norm_type, &M, &N, comp_double.data(), &lda, work); if(gold_norm > 0) error /= gold_norm; return error; } template , int> = 0> double norm_error(char norm_type, rocblas_int M, rocblas_int N, rocblas_int lda_gold, T* gold, T* comp, rocblas_int lda_comp = 0) { // norm type can be 'O', 'I', 'F', 'o', 'i', 'f' for one, infinity or // Frobenius norm one norm is max column sum infinity norm is max row sum // Frobenius is l2 norm of matrix entries rocblas_int lda = M; lda_comp = lda_comp > 0 ? lda_comp : lda_gold; host_vector gold_double(N * lda); host_vector comp_double(N * lda); for(rocblas_int i = 0; i < M; i++) { for(rocblas_int j = 0; j < N; j++) { gold_double[i + j * lda] = rocblas_double_complex(std::real(gold[i + j * lda_gold]), std::imag(gold[i + j * lda_gold])); comp_double[i + j * lda] = rocblas_double_complex(std::real(comp[i + j * lda_comp]), std::imag(comp[i + j * lda_comp])); } } double work[M]; rocblas_int incx = 1; rocblas_double_complex alpha = -1.0; rocblas_int size = lda * N; double gold_norm = xlange(&norm_type, &M, &N, gold_double.data(), &lda, work); xaxpy(&size, &alpha, gold_double.data(), &incx, comp_double.data(), &incx); double error = xlange(&norm_type, &M, &N, comp_double.data(), &lda, work); if(gold_norm > 0) error /= gold_norm; return error; } template double norm_error_upperTr( char norm_type, rocblas_int M, rocblas_int N, rocblas_int lda, T* gold, T* comp) { for(rocblas_int i = 0; i < M; ++i) { for(rocblas_int j = 0; j < N; ++j) { if(i > j) { gold[i + j * lda] = T(0); comp[i + j * lda] = T(0); } } } return norm_error(norm_type, M, N, lda, gold, comp); } template double norm_error_lowerTr( char norm_type, rocblas_int M, rocblas_int N, rocblas_int lda, T* gold, T* comp) { for(rocblas_int i = 0; i < M; ++i) { for(rocblas_int j = 0; j < N; ++j) { if(i < j) { gold[i + j * lda] = T(0); comp[i + j * lda] = T(0); } } } return norm_error(norm_type, M, N, lda, gold, comp); } template S snorm(char norm_type, rocblas_int m, rocblas_int n, T* A, rocblas_int lda) { return xlange(&norm_type, &m, &n, A, &lda, (S*)nullptr); } hipSOLVER-rocm-5.5.1/clients/rocsolvercommon/rocsolver_arguments.hpp000066400000000000000000000167621436107207300257160ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include "../rocblascommon/program_options.hpp" #include #include using rocblas_int = int; using rocblas_stride = ptrdiff_t; using variables_map = roc::variables_map; using variable_value = roc::variable_value; class Arguments : private std::map { using base = std::map; // names of arguments that have not yet been used by tests std::set to_consume; public: // test options rocblas_int norm_check = 0; rocblas_int unit_check = 1; rocblas_int timing = 0; rocblas_int perf = 0; rocblas_int singular = 0; rocblas_int iters = 5; rocblas_int mem_query = 0; rocblas_int batch_count = 1; // get and set function arguments template const T& peek(const std::string& name) const { return at(name).as(); } template const T& get(const std::string& name) { to_consume.erase(name); auto val = find(name); if(val != end() && !val->second.empty()) return val->second.as(); else throw std::invalid_argument("No value provided for " + name); } template const T get(const std::string& name, const T& default_value) { to_consume.erase(name); auto val = find(name); if(val != end() && !val->second.empty() && !val->second.defaulted()) return val->second.as(); else return default_value; } template void set(const std::string& name, const T& val) { to_consume.insert(name); base::operator[](name) = variable_value(val, false); } void populate(const variables_map& vm) { for(auto& pair : vm) { base::operator[](pair.first) = pair.second; if(!pair.second.empty() && !pair.second.defaulted()) to_consume.insert(pair.first); } // remove test arguments to_consume.erase("help"); to_consume.erase("function"); to_consume.erase("precision"); to_consume.erase("batch_count"); to_consume.erase("verify"); to_consume.erase("iters"); to_consume.erase("perf"); to_consume.erase("singular"); to_consume.erase("device"); } void clear() { to_consume.clear(); base::clear(); } // validate function arguments void validate_precision(const std::string& name) const { auto val = find(name); if(val == end()) return; char precision = val->second.as(); if(precision != 's' && precision != 'd' && precision != 'c' && precision != 'z') throw std::invalid_argument("Invalid value for " + name); } void validate_operation(const std::string& name) const { auto val = find(name); if(val == end()) return; char trans = val->second.as(); if(trans != 'N' && trans != 'T' && trans != 'C') throw std::invalid_argument("Invalid value for " + name); } void validate_side(const std::string& name) const { auto val = find(name); if(val == end()) return; char side = val->second.as(); if(side != 'L' && side != 'R' && side != 'B') throw std::invalid_argument("Invalid value for " + name); } void validate_fill(const std::string& name) const { auto val = find(name); if(val == end()) return; char uplo = val->second.as(); if(uplo != 'U' && uplo != 'L' && uplo != 'F') throw std::invalid_argument("Invalid value for " + name); } void validate_direct(const std::string& name) const { auto val = find(name); if(val == end()) return; char direct = val->second.as(); if(direct != 'F' && direct != 'B') throw std::invalid_argument("Invalid value for " + name); } void validate_storev(const std::string& name) const { auto val = find(name); if(val == end()) return; char storev = val->second.as(); if(storev != 'R' && storev != 'C') throw std::invalid_argument("Invalid value for " + name); } void validate_svect(const std::string& name) const { auto val = find(name); if(val == end()) return; char svect = val->second.as(); if(svect != 'A' && svect != 'S' && svect != 'O' && svect != 'N') throw std::invalid_argument("Invalid value for " + name); } void validate_workmode(const std::string& name) const { auto val = find(name); if(val == end()) return; char workmode = val->second.as(); if(workmode != 'O' && workmode != 'I') throw std::invalid_argument("Invalid value for " + name); } void validate_evect(const std::string& name) const { auto val = find(name); if(val == end()) return; char evect = val->second.as(); if(evect != 'V' && evect != 'I' && evect != 'N') throw std::invalid_argument("Invalid value for " + name); } void validate_erange(const std::string name) const { auto val = find(name); if(val == end()) return; char range = val->second.as(); if(range != 'A' && range != 'V' && range != 'I') throw std::invalid_argument("Invalid value for " + name); } void validate_itype(const std::string& name) const { auto val = find(name); if(val == end()) return; char itype = val->second.as(); if(itype != '1' && itype != '2' && itype != '3') throw std::invalid_argument("Invalid value for " + name); } void validate_consumed() const { if(to_consume.size() > 0) { std::stringstream ss; ss << "Not all arguments were consumed:"; for(std::string name : to_consume) ss << ' ' << name; throw std::invalid_argument(ss.str()); } } }; hipSOLVER-rocm-5.5.1/clients/rocsolvercommon/rocsolver_test.hpp000066400000000000000000000077601436107207300246660ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #pragma once #include #include #include #include template constexpr double get_epsilon() { using S = decltype(std::real(T{})); return std::numeric_limits::epsilon(); } template constexpr double get_safemin() { using S = decltype(std::real(T{})); auto eps = get_epsilon(); auto s1 = std::numeric_limits::min(); auto s2 = 1 / std::numeric_limits::max(); if(s2 > s1) return s2 * (1 + eps); return s1; } #ifdef GOOGLE_TEST #define ROCSOLVER_TEST_CHECK(T, max_error, tol) ASSERT_LE((max_error), (tol)*get_epsilon()) #else #define ROCSOLVER_TEST_CHECK(T, max_error, tol) #endif typedef enum rocsolver_inform_type_ { inform_quick_return, inform_invalid_size, inform_invalid_args, inform_mem_query, } rocsolver_inform_type; inline void rocsolver_bench_inform(rocsolver_inform_type it, size_t arg = 0) { switch(it) { case inform_quick_return: printf("Quick return...\n"); break; case inform_invalid_size: printf("Invalid size arguments...\n"); break; case inform_invalid_args: printf("Invalid value in arguments...\n"); break; case inform_mem_query: printf("%li bytes of device memory are required...\n", arg); break; } printf("No performance data to collect.\n"); printf("No computations to verify.\n"); std::fflush(stdout); } inline void rocsolver_bench_output() { // empty version std::cerr << std::endl; } template inline void rocsolver_bench_output(T arg, Ts... args) { std::stringstream ss; ss << std::left << std::setw(15) << arg; std::cerr << ss.str(); if(sizeof...(Ts) > 0) std::cerr << ' '; rocsolver_bench_output(args...); } // template , int> = 0> // inline T sconj(T scalar) // { // return scalar; // } // template , int> = 0> // inline T sconj(T scalar) // { // return std::conj(scalar); // } // // A struct implicity convertable to and from char, used so we can customize // // Google Test printing for LAPACK char arguments without affecting the default // // char output. // struct rocsolver_op_char // { // rocsolver_op_char(char c) // : data(c) // { // } // operator char() const // { // return data; // } // char data; // }; // // gtest printers // inline std::ostream& operator<<(std::ostream& os, rocblas_status x) // { // return os << rocblas_status_to_string(x); // } // inline std::ostream& operator<<(std::ostream& os, rocsolver_op_char x) // { // return os << x.data; // } hipSOLVER-rocm-5.5.1/clients/samples/000077500000000000000000000000001436107207300173035ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/clients/samples/CMakeLists.txt000066400000000000000000000060651436107207300220520ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## add_executable( example-c-basic example_basic.c ) add_executable( example-cpp-basic example_basic.cpp ) # We test for C99 compatibility in the example-c.c test set_source_files_properties(example_basic.c PROPERTIES LANGUAGE CXX) set_source_files_properties(example_basic.c PROPERTIES COMPILE_FLAGS "-xc -std=c99") # Test for C++11 compatibility in one of the samples set_property(TARGET example-cpp-basic PROPERTY CXX_STANDARD 11) if( NOT TARGET hipsolver ) find_package( hipsolver CONFIG PATHS ${ROCM_PATH}/hipsolver ) if( NOT hipsolver_FOUND ) message( FATAL_ERROR "hipSOLVER is a required dependency and is not found; try adding hipSOLVER path to CMAKE_PREFIX_PATH") endif( ) endif( ) foreach( exe example-c-basic;example-cpp-basic; ) # External header includes included as SYSTEM files target_include_directories( ${exe} SYSTEM PRIVATE $ ) target_include_directories( ${exe} PRIVATE $ ) target_compile_options( ${exe} PRIVATE -mf16c) target_link_libraries( ${exe} PRIVATE roc::hipsolver ) if( NOT USE_CUDA ) target_link_libraries( ${exe} PRIVATE hip::host ) if( CUSTOM_TARGET ) target_link_libraries( ${exe} PRIVATE hip::${CUSTOM_TARGET} ) endif( ) else( ) target_compile_definitions( ${exe} PRIVATE __HIP_PLATFORM_NVCC__ ) target_include_directories( ${exe} PRIVATE $ ) target_link_libraries( ${exe} PRIVATE ${CUDA_LIBRARIES} ) endif( ) set_target_properties(${exe} PROPERTIES LINKER_LANGUAGE CXX) set_target_properties( ${exe} PROPERTIES DEBUG_POSTFIX "-d" CXX_EXTENSIONS NO ) set_target_properties( ${exe} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) endforeach( ) hipSOLVER-rocm-5.5.1/clients/samples/example_basic.c000066400000000000000000000114621436107207300222470ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include // for hip functions #include // for all the hipsolver C interfaces and type declarations #include // for printf #include // for malloc // Example: Compute the LU Factorization of a matrix on the GPU double* create_example_matrix(int* M_out, int* N_out, int* lda_out) { // a *very* small example input; not a very efficient use of the API const double A[3][3] = {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}}; const int M = 3; const int N = 3; const int lda = 3; *M_out = M; *N_out = N; *lda_out = lda; // note: matrices must be stored in column major format, // i.e. entry (i,j) should be accessed by hA[i + j*lda] double* hA = malloc(sizeof(double) * lda * N); for(size_t i = 0; i < M; ++i) { for(size_t j = 0; j < N; ++j) { // copy A (2D array) into hA (1D array, column-major) hA[i + j * lda] = A[i][j]; } } return hA; } // We use hipsolverDgetrf to factor a real M-by-N matrix, A. int main() { int M; // rows int N; // cols int lda; // leading dimension double* hA = create_example_matrix(&M, &N, &lda); // input matrix on CPU // let's print the input matrix, just to see it printf("A = [\n"); for(size_t i = 0; i < M; ++i) { printf(" "); for(size_t j = 0; j < N; ++j) { printf("% .3f ", hA[i + j * lda]); } printf(";\n"); } printf("]\n"); // initialization hipsolverHandle_t handle; hipsolverCreate(&handle); // calculate the sizes of our arrays size_t size_piv = (M < N) ? M : N; // count of pivot indices size_t size_A = (size_t)lda * N; // count of elements in matrix A // allocate memory on GPU int* dInfo; int* dIpiv; double* dA; hipMalloc((void**)&dInfo, sizeof(int)); hipMalloc((void**)&dIpiv, sizeof(int) * size_piv); hipMalloc((void**)&dA, sizeof(double) * size_A); // copy data to GPU hipMemcpy(dA, hA, sizeof(double) * size_A, hipMemcpyHostToDevice); // create the workspace double* dWork; int size_work; // size of workspace to pass to getrf hipsolverDgetrf_bufferSize(handle, M, N, dA, lda, &size_work); hipMalloc((void**)&dWork, size_work); // compute the LU factorization on the GPU hipsolverStatus_t status = hipsolverDgetrf(handle, M, N, dA, lda, dWork, size_work, dIpiv, dInfo); if(status != HIPSOLVER_STATUS_SUCCESS) return status; // copy the results back to CPU int* hInfo = malloc(sizeof(int)); // provides information about algorithm completion int* hIpiv = malloc(sizeof(int) * size_piv); // array for pivot indices on CPU hipMemcpy(hInfo, dInfo, sizeof(int), hipMemcpyDeviceToHost); hipMemcpy(hIpiv, dIpiv, sizeof(int) * size_piv, hipMemcpyDeviceToHost); hipMemcpy(hA, dA, sizeof(double) * size_A, hipMemcpyDeviceToHost); // the results are now in hA and hIpiv // we can print some of the results if we want to see them printf("U = [\n"); for(size_t i = 0; i < M; ++i) { printf(" "); for(size_t j = 0; j < N; ++j) { printf("% .3f ", (i <= j) ? hA[i + j * lda] : 0); } printf(";\n"); } printf("]\n"); // clean up free(hInfo); free(hIpiv); free(hA); hipFree(dWork); hipFree(dInfo); hipFree(dIpiv); hipFree(dA); hipsolverDestroy(handle); } hipSOLVER-rocm-5.5.1/clients/samples/example_basic.cpp000066400000000000000000000113121436107207300226010ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * ************************************************************************ */ #include // for std::min #include // for hip functions #include // for all the hipsolver C interfaces and type declarations #include // for size_t, printf #include // Example: Compute the LU Factorization of a matrix on the GPU void get_example_matrix(std::vector& hA, int& M, int& N, int& lda) { // a *very* small example input; not a very efficient use of the API const double A[3][3] = {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}}; M = 3; N = 3; lda = 3; // note: matrices must be stored in column major format, // i.e. entry (i,j) should be accessed by hA[i + j*lda] hA.resize(size_t(lda) * N); for(size_t i = 0; i < M; ++i) { for(size_t j = 0; j < N; ++j) { // copy A (2D array) into hA (1D array, column-major) hA[i + j * lda] = A[i][j]; } } } // We use hipsolverDgetrf to factor a real M-by-N matrix, A. int main() { int M; // rows int N; // cols int lda; // leading dimension std::vector hA; // input matrix on CPU get_example_matrix(hA, M, N, lda); // let's print the input matrix, just to see it printf("A = [\n"); for(size_t i = 0; i < M; ++i) { printf(" "); for(size_t j = 0; j < N; ++j) { printf("% .3f ", hA[i + j * lda]); } printf(";\n"); } printf("]\n"); // initialization hipsolverHandle_t handle; hipsolverCreate(&handle); // calculate the sizes of our arrays size_t size_piv = size_t(std::min(M, N)); // count of pivot indices size_t size_A = size_t(lda) * N; // count of elements in matrix A // allocate memory on GPU int* dInfo; int* dIpiv; double* dA; hipMalloc(&dInfo, sizeof(int)); hipMalloc(&dIpiv, sizeof(int) * size_piv); hipMalloc(&dA, sizeof(double) * size_A); // copy data to GPU hipMemcpy(dA, hA.data(), sizeof(double) * size_A, hipMemcpyHostToDevice); // create the workspace double* dWork; int size_work; // size of workspace to pass to getrf hipsolverDgetrf_bufferSize(handle, M, N, dA, lda, &size_work); hipMalloc(&dWork, size_work); // compute the LU factorization on the GPU hipsolverStatus_t status = hipsolverDgetrf(handle, M, N, dA, lda, dWork, size_work, dIpiv, dInfo); if(status != HIPSOLVER_STATUS_SUCCESS) return status; // copy the results back to CPU std::vector hInfo(1); // provides information about algorithm completion std::vector hIpiv(size_piv); // array for pivot indices on CPU hipMemcpy(hInfo.data(), dInfo, sizeof(int), hipMemcpyDeviceToHost); hipMemcpy(hIpiv.data(), dIpiv, sizeof(int) * size_piv, hipMemcpyDeviceToHost); hipMemcpy(hA.data(), dA, sizeof(double) * size_A, hipMemcpyDeviceToHost); // the results are now in hA and hIpiv // we can print some of the results if we want to see them printf("U = [\n"); for(size_t i = 0; i < M; ++i) { printf(" "); for(size_t j = 0; j < N; ++j) { printf("% .3f ", (i <= j) ? hA[i + j * lda] : 0); } printf(";\n"); } printf("]\n"); // clean up hipFree(dWork); hipFree(dInfo); hipFree(dIpiv); hipFree(dA); hipsolverDestroy(handle); } hipSOLVER-rocm-5.5.1/cmake/000077500000000000000000000000001436107207300152565ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/cmake/armor-config.cmake000066400000000000000000000041651436107207300206510ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## # Enables increasingly expensive runtime correctness checks # 0 - Nothing # 1 - Inexpensive correctness checks (extra assertions, etc..) # Note: Some checks are added by the optimizer, so it can help to build # with optimizations enabled. e.g. -Og # 2 - Expensive correctness checks (debug iterators) macro( add_armor_flags target level ) if( UNIX AND "${level}" GREATER "0" ) if( "${level}" GREATER "1" ) # Building with std debug iterators is enabled by the defines below, but # requires building C++ dependencies with the same defines. target_compile_definitions( ${target} PRIVATE _GLIBCXX_DEBUG ) endif( ) target_compile_definitions( ${target} PRIVATE $<$>:_FORTIFY_SOURCE=1> # requires optimizations to work _GLIBCXX_ASSERTIONS ) endif( ) endmacro( ) hipSOLVER-rocm-5.5.1/cmake/get-cli-arguments.cmake000066400000000000000000000020001436107207300215770ustar00rootroot00000000000000# Attempt (best effort) to return a list of user specified parameters cmake was invoked with # NOTE: Even if the user specifies CMAKE_INSTALL_PREFIX on the command line, the parameter is # not returned because it does not have the matching helpstring function( append_cmake_cli_arguments initial_cli_args return_cli_args ) # Retrieves the contents of CMakeCache.txt get_cmake_property( cmake_properties CACHE_VARIABLES ) foreach( property ${cmake_properties} ) get_property(help_string CACHE ${property} PROPERTY HELPSTRING ) # Properties specified on the command line have boilerplate text if( help_string MATCHES "variable specified on the command line" ) # message( STATUS "property: ${property}") # message( STATUS "value: ${${property}}") list( APPEND cli_args "-D${property}=${${property}}") endif( ) endforeach( ) # message( STATUS "get_command_line_arguments: ${cli_args}") set( ${return_cli_args} ${${initial_cli_args}} ${cli_args} PARENT_SCOPE ) endfunction( ) hipSOLVER-rocm-5.5.1/custom.properties000066400000000000000000000001401436107207300176210ustar00rootroot00000000000000booktitle=hipSOLVER API Guide spreadsheet.xml=docs/classification-map.xml document.locale=enushipSOLVER-rocm-5.5.1/deps/000077500000000000000000000000001436107207300151315ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/deps/CMakeLists.txt000066400000000000000000000111631436107207300176730ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## # Helper cmake script to automate building dependencies for hipsolver # This script can be invoked manually by the user with 'cmake -P' # The ROCm platform requires Ubuntu 16.04 or Fedora 24, which has cmake 3.5 cmake_minimum_required( VERSION 3.5 ) list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../cmake ) # Consider removing this in the future # It can be annoying for visual studio developers to build a project that tries to install into 'program files' if( WIN32 AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" FORCE ) endif( ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() if( NOT DEFINED CMAKE_Fortran_COMPILER AND NOT DEFINED ENV{FC} ) set( CMAKE_Fortran_COMPILER "gfortran" ) endif() # The superbuild does not build anything itself; all compiling is done in external projects project( hipsolver-dependencies NONE ) option( BUILD_BOOST "Download and build boost library" ON ) option( BUILD_GTEST "Download and build googletest library" ON ) option( BUILD_LAPACK "Download and build lapack library" ON ) # option( BUILD_VERBOSE "Print helpful build debug information" OFF ) # if( BUILD_VERBOSE ) # message( STATUS "CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}" ) # message( STATUS "CMAKE_BINARY_DIR: ${CMAKE_BINARY_DIR}" ) # message( STATUS "CMAKE_SOURCE_DIR: ${CMAKE_SOURCE_DIR}" ) # message( STATUS "CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}" ) # message( STATUS "CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}" ) # message( STATUS "CMAKE_CURRENT_LIST_DIR: ${CMAKE_CURRENT_LIST_DIR}" ) # message( STATUS "CMAKE_CURRENT_LIST_FILE: ${CMAKE_CURRENT_LIST_FILE}" ) # endif( ) # This module scrapes the CMakeCache.txt file and attempts to get all the cli options the user specified to cmake invocation include( get-cli-arguments ) # The following is a series of super-build projects; this cmake project will download and build if( BUILD_GTEST ) include( external-gtest ) list( APPEND hipsolver_dependencies googletest ) set( gtest_custom_target COMMAND cd ${GTEST_BINARY_ROOT}$ ${CMAKE_COMMAND} --build . --target install ) endif( ) if( BUILD_LAPACK ) include( external-lapack ) list( APPEND hipsolver_dependencies lapack ) set( lapack_custom_target COMMAND cd ${LAPACK_BINARY_ROOT}$ ${CMAKE_COMMAND} --build . --target install ) endif( ) if( BUILD_BOOST ) include( external-boost ) list( APPEND hipsolver_dependencies boost ) set( boost_custom_target COMMAND cd ${BOOST_BINARY_ROOT}$ ${Boost.Command} install ) endif( ) # POLICY CMP0037 - "Target names should not be reserved and should match a validity pattern" # Familiar target names like 'install' should be OK at the super-build level if( POLICY CMP0037 ) cmake_policy( SET CMP0037 OLD ) endif( ) add_custom_target( install ${boost_custom_target} ${gtest_custom_target} ${lapack_custom_target} DEPENDS ${hipsolver_dependencies} ) hipSOLVER-rocm-5.5.1/deps/external-boost.cmake000066400000000000000000000171101436107207300211010ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## message( STATUS "Configuring boost external dependency" ) include( ExternalProject ) set( PREFIX_BOOST ${CMAKE_INSTALL_PREFIX} CACHE PATH "Location where boost should install, defaults to /usr/local" ) # We need to detect the compiler the user is attempting to invoke with CMake, # we do our best to translate cmake parameters into bjam parameters enable_language( CXX ) include( build-bitness ) # TODO: Options should be added to allow downloading Boost straight from github # This file is used to add Boost as a library dependency to another project # This sets up boost to download from sourceforge, and builds it as a cmake # ExternalProject # Change this one line to upgrade to newer versions of boost set( ext.Boost_VERSION "1.64.0" CACHE STRING "Boost version to download/use" ) mark_as_advanced( ext.Boost_VERSION ) string( REPLACE "." "_" ext.Boost_Version_Underscore ${ext.Boost_VERSION} ) message( STATUS "ext.Boost_VERSION: " ${ext.Boost_VERSION} ) if( WIN32 ) # For newer cmake versions, 7z archives are much smaller to download if( CMAKE_VERSION VERSION_LESS "3.1.0" ) set( Boost_Ext "zip" ) else( ) set( Boost_Ext "7z" ) endif( ) else( ) set( Boost_Ext "tar.bz2" ) endif( ) if( WIN32 ) set( Boost.Command b2 --prefix=${PREFIX_BOOST} ) else( ) set( Boost.Command ./b2 --prefix=${PREFIX_BOOST} ) endif( ) if( CMAKE_COMPILER_IS_GNUCXX ) list( APPEND Boost.Command cxxflags=-fPIC -std=c++11 ) elseif( XCODE_VERSION OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang") ) list( APPEND Boost.Command cxxflags=-std=c++11 -stdlib=libc++ linkflags=-stdlib=libc++ ) endif( ) include( ProcessorCount ) ProcessorCount( Cores ) if( NOT Cores EQUAL 0 ) # Travis can fail to build Boost sporadically; uses 32 cores, reduce stress on VM if( DEFINED ENV{TRAVIS} ) if( Cores GREATER 8 ) set( Cores 8 ) endif( ) endif( ) # Add build thread in addition to the number of cores that we have math( EXPR Cores "${Cores} + 1 " ) else( ) # If we could not detect # of cores, assume 1 core and add an additional build thread set( Cores "2" ) endif( ) message( STATUS "ExternalBoost using ( " ${Cores} " ) cores to build with" ) message( STATUS "ExternalBoost building [ program_options, serialization, filesystem, system, regex ] components" ) list( APPEND Boost.Command -j ${Cores} --with-program_options --with-serialization --with-filesystem --with-system --with-regex ) if( BUILD_64 ) list( APPEND Boost.Command address-model=64 ) else( ) list( APPEND Boost.Command address-model=32 ) endif( ) if( MSVC10 ) list( APPEND Boost.Command toolset=msvc-10.0 ) elseif( MSVC11 ) list( APPEND Boost.Command toolset=msvc-11.0 ) elseif( MSVC12 ) list( APPEND Boost.Command toolset=msvc-12.0 ) elseif( MSVC14 ) list( APPEND Boost.Command toolset=msvc-14.0 ) elseif( XCODE_VERSION OR ( CMAKE_CXX_COMPILER_ID MATCHES "Clang" ) ) list( APPEND Boost.Command toolset=clang ) elseif( CMAKE_COMPILER_IS_GNUCXX ) list( APPEND Boost.Command toolset=gcc ) endif( ) if( WIN32 AND (ext.Boost_VERSION VERSION_LESS "1.60.0") ) list( APPEND Boost.Command define=BOOST_LOG_USE_WINNT6_API ) endif( ) if( NOT DEFINED ext.Boost_LINK ) if( ${BUILD_SHARED_LIBS} MATCHES "ON" ) set( ext.Boost_LINK "shared" CACHE STRING "Which boost link method? static | shared | static,shared" ) else( ) set( ext.Boost_LINK "static" CACHE STRING "Which boost link method? static | shared | static,shared" ) endif( ) endif() mark_as_advanced( ext.Boost_LINK ) if( WIN32 ) # Versioned is the default on windows set( ext.Boost_LAYOUT "versioned" CACHE STRING "Which boost layout method? versioned | tagged | system" ) # For windows, default to build both variants to support the VS IDE set( ext.Boost_VARIANT "debug,release" CACHE STRING "Which boost variant? debug | release | debug,release" ) else( ) # Tagged builds provide unique enough names to be able to build both variants set( ext.Boost_LAYOUT "tagged" CACHE STRING "Which boost layout method? versioned | tagged | system" ) # For Linux, typically a build tree only needs one variant if( ${CMAKE_BUILD_TYPE} MATCHES "Debug") set( ext.Boost_VARIANT "debug" CACHE STRING "Which boost variant? debug | release | debug,release" ) else( ) set( ext.Boost_VARIANT "release" CACHE STRING "Which boost variant? debug | release | debug,release" ) endif( ) endif( ) mark_as_advanced( ext.Boost_LAYOUT ) mark_as_advanced( ext.Boost_VARIANT ) list( APPEND Boost.Command --layout=${ext.Boost_LAYOUT} link=${ext.Boost_LINK} variant=${ext.Boost_VARIANT} ) message( STATUS "Boost.Command: ${Boost.Command}" ) # If the user has a cached local copy stored somewhere, they can define the full path to the package in a BOOST_URL environment variable if( DEFINED ENV{BOOST_URL} ) set( ext.Boost_URL "$ENV{BOOST_URL}" CACHE STRING "URL to download Boost from" ) else( ) set( ext.Boost_URL "http://sourceforge.net/projects/boost/files/boost/${ext.Boost_VERSION}/boost_${ext.Boost_Version_Underscore}.${Boost_Ext}/download" CACHE STRING "URL to download Boost from" ) endif( ) mark_as_advanced( ext.Boost_URL ) set( Boost.Bootstrap "" ) set( ext.HASH "" ) if( WIN32 ) set( Boost.Bootstrap "bootstrap.bat" ) if( CMAKE_VERSION VERSION_LESS "3.1.0" ) # .zip file set( ext.HASH "b99973c805f38b549dbeaf88701c0abeff8b0e8eaa4066df47cac10a32097523" ) else( ) # .7z file set( ext.HASH "49c6abfeb5b480f6a86119c0d57235966b4690ee6ff9e6401ee868244808d155" ) endif( ) else( ) set( Boost.Bootstrap "./bootstrap.sh" ) # .tar.bz2 set( ext.HASH "7bcc5caace97baa948931d712ea5f37038dbb1c5d89b43ad4def4ed7cb683332" ) if( XCODE_VERSION OR ( CMAKE_CXX_COMPILER_ID MATCHES "Clang" ) ) list( APPEND Boost.Bootstrap --with-toolset=clang ) endif( ) endif( ) # Below is a fancy CMake command to download, build and install Boost on the users computer ExternalProject_Add( boost PREFIX ${CMAKE_BINARY_DIR}/boost URL ${ext.Boost_URL} URL_HASH SHA256=${ext.HASH} UPDATE_COMMAND ${Boost.Bootstrap} LOG_UPDATE 1 CONFIGURE_COMMAND "" BUILD_COMMAND ${Boost.Command} stage BUILD_IN_SOURCE 1 LOG_BUILD 1 INSTALL_COMMAND "" ) set_property( TARGET boost PROPERTY FOLDER "extern" ) ExternalProject_Get_Property( boost install_dir ) ExternalProject_Get_Property( boost binary_dir ) # For use by the user of ExternalGtest.cmake set( BOOST_INSTALL_ROOT ${install_dir} ) set( BOOST_BINARY_ROOT ${binary_dir} ) hipSOLVER-rocm-5.5.1/deps/external-gtest.cmake000066400000000000000000000115011436107207300210770ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## message( STATUS "Configuring gtest external dependency" ) include( ExternalProject ) # set( gtest_cmake_args -DCMAKE_INSTALL_PREFIX=/package ) set( PREFIX_GTEST ${CMAKE_INSTALL_PREFIX} CACHE PATH "Location where boost should install, defaults to /usr/local" ) set( gtest_cmake_args -DCMAKE_INSTALL_PREFIX=${PREFIX_GTEST} ) append_cmake_cli_arguments( gtest_cmake_args gtest_cmake_args ) set( gtest_git_repository "https://github.com/google/googletest.git" CACHE STRING "URL to download gtest from" ) set( gtest_git_tag "release-1.11.0" CACHE STRING "URL to download gtest from" ) if( MSVC ) list( APPEND gtest_cmake_args -Dgtest_force_shared_crt=ON -DCMAKE_DEBUG_POSTFIX=d ) # else( ) # GTEST_USE_OWN_TR1_TUPLE necessary to compile with hipcc # list( APPEND gtest_cmake_args -DGTEST_USE_OWN_TR1_TUPLE=1 ) endif( ) if( CMAKE_CONFIGURATION_TYPES ) set( gtest_make COMMAND ${CMAKE_COMMAND} --build --config Release COMMAND ${CMAKE_COMMAND} --build --config Debug ) else( ) # Add build thread in addition to the number of cores that we have include( ProcessorCount ) ProcessorCount( Cores ) # If we are not using an IDE, assume nmake with visual studio if( MSVC ) set( gtest_make "nmake" ) else( ) set( gtest_make "make" ) # The -j paramter does not work with nmake if( NOT Cores EQUAL 0 ) math( EXPR Cores "${Cores} + 1 " ) list( APPEND gtest_make -j ${Cores} ) else( ) # If we could not detect # of cores, assume 1 core and add an additional build thread list( APPEND gtest_make -j 2 ) endif( ) endif( ) message( STATUS "ExternalGmock using ( " ${Cores} " ) cores to build with" ) endif( ) # message( STATUS "gtest_make ( " ${gtest_make} " ) " ) # message( STATUS "gtest_cmake_args ( " ${gtest_cmake_args} " ) " ) # Master branch has a new structure that combines googletest with googlemock ExternalProject_Add( googletest PREFIX ${CMAKE_BINARY_DIR}/gtest GIT_REPOSITORY ${gtest_git_repository} GIT_TAG ${gtest_git_tag} CMAKE_ARGS ${gtest_cmake_args} BUILD_COMMAND ${gtest_make} LOG_BUILD 1 INSTALL_COMMAND "" LOG_INSTALL 1 ) ExternalProject_Get_Property( googletest source_dir ) # For visual studio, the path 'debug' is hardcoded because that is the default VS configuration for a build. # Doesn't matter if its the gtest or gtestd project above set( package_dir "${PREFIX_GTEST}" ) if( CMAKE_CONFIGURATION_TYPES ) # Create a package by bundling libraries and header files if( BUILD_64 ) set( LIB_DIR lib64 ) else( ) set( LIB_DIR lib ) endif( ) set( gtest_lib_dir "/${LIB_DIR}" ) ExternalProject_Add_Step( googletest createPackage COMMAND ${CMAKE_COMMAND} -E copy_directory ${gtest_lib_dir}/Debug ${package_dir}/${LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory ${gtest_lib_dir}/Release ${package_dir}/${LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory ${gtest_lib_dir}/Debug ${package_dir}/${LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory ${gtest_lib_dir}/Release ${package_dir}/${LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory /include ${package_dir}/include COMMAND ${CMAKE_COMMAND} -E copy_directory /gtest/include/gtest ${package_dir}/include/gtest DEPENDEES install ) endif( ) set_property( TARGET googletest PROPERTY FOLDER "extern") ExternalProject_Get_Property( googletest install_dir ) ExternalProject_Get_Property( googletest binary_dir ) # For use by the user of ExternalGtest.cmake set( GTEST_INSTALL_ROOT ${install_dir} ) set( GTEST_BINARY_ROOT ${binary_dir} ) hipSOLVER-rocm-5.5.1/deps/external-lapack.cmake000066400000000000000000000060351436107207300212120ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # ######################################################################## message( STATUS "Configuring lapack external dependency" ) include( ExternalProject ) # set( lapack_cmake_args -DCMAKE_INSTALL_PREFIX=/package ) set( PREFIX_LAPACK ${CMAKE_INSTALL_PREFIX} CACHE PATH "Location where lapack should install, defaults to /usr/local" ) set( lapack_cmake_args -DCMAKE_INSTALL_PREFIX=${PREFIX_LAPACK} ) append_cmake_cli_arguments( lapack_cmake_args lapack_cmake_args ) set( lapack_git_repository "https://github.com/Reference-LAPACK/lapack-release" CACHE STRING "URL to download lapack from" ) set( lapack_git_tag "lapack-3.7.1" CACHE STRING "git branch" ) # message( STATUS "lapack_make ( " ${lapack_make} " ) " ) # message( STATUS "lapack_cmake_args ( " ${lapack_cmake_args} " ) " ) enable_language( Fortran ) include( GNUInstallDirs ) # lapack cmake exports has a bug on debian architectures, they do not take into account the # lib/ paths # if CMAKE_INSTALL_LIBDIR is of the form above, strip the machine # Match against a '/' in CMAKE_INSTALL_LIBDIR, i.e. lib/x86_64-linux-gnu if( ${CMAKE_INSTALL_LIBDIR} MATCHES "lib/.*" ) list( APPEND lapack_cmake_args "-DCMAKE_INSTALL_LIBDIR=lib" ) endif( ) ExternalProject_Add( lapack PREFIX ${CMAKE_BINARY_DIR}/lapack GIT_REPOSITORY ${lapack_git_repository} GIT_TAG ${lapack_git_tag} CMAKE_ARGS ${lapack_cmake_args} -DCBLAS=ON -DLAPACKE=OFF -DBUILD_TESTING=OFF -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} LOG_BUILD 1 INSTALL_COMMAND "" LOG_INSTALL 1 ) ExternalProject_Get_Property( lapack source_dir ) set_property( TARGET lapack PROPERTY FOLDER "extern" ) ExternalProject_Get_Property( lapack install_dir ) ExternalProject_Get_Property( lapack binary_dir ) # For use by the user of ExternalGtest.cmake set( LAPACK_INSTALL_ROOT ${install_dir} ) set( LAPACK_BINARY_ROOT ${binary_dir} ) hipSOLVER-rocm-5.5.1/docs/000077500000000000000000000000001436107207300151265ustar00rootroot00000000000000hipSOLVER-rocm-5.5.1/docs/Doxyfile000066400000000000000000003212031436107207300166350ustar00rootroot00000000000000# Doxyfile 1.8.10 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "hipsolver" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = v0.1 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = "ROCm SOLVER marshalling library" # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = ./rocmlogo.png # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = docBin # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: # FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: # Fortran. In the later case the parser tries to guess whether the code is fixed # or free formatted code, this is the default for Fortran type files), VHDL. For # instance to make doxygen treat .inc files as Fortran files (default is PHP), # and .f files as C (default is Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = YES # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = YES # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = YES # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO, these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = ../library/include \ ../library/include/internal # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, # *.vhdl, *.ucf, *.qsf, *.as and *.js. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.idl \ *.ddl \ *.odl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.cs \ *.d \ *.php \ *.php4 \ *.php5 \ *.phtml \ *.inc \ *.m \ *.markdown \ *.md \ *.mm \ *.dox \ *.py \ *.f90 \ *.f \ *.for \ *.tcl \ *.vhd \ *.vhdl \ *.ucf \ *.qsf \ *.as \ *.js # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = ../README.md #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: http://clang.llvm.org/) for more accurate parsing at the # cost of reduced performance. This can be particularly helpful with template # rich C++ code for which doxygen's built-in parser lacks the necessary type # information. # Note: The availability of this option depends on whether or not doxygen was # compiled with the --with-libclang option. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 1 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /