pax_global_header00006660000000000000000000000064144520305420014511gustar00rootroot0000000000000052 comment=355907e6796ab69fb69157120718b58e67cbfe41 hipFFT-rocm-5.7.1/000077500000000000000000000000001445203054200136015ustar00rootroot00000000000000hipFFT-rocm-5.7.1/.clang-format000066400000000000000000000065421445203054200161630ustar00rootroot00000000000000# Style file for MLSE Libraries based on the modified rocBLAS style # Common settings BasedOnStyle: WebKit TabWidth: 4 IndentWidth: 4 UseTab: Never ColumnLimit: 100 # Other languages JavaScript, Proto --- Language: Cpp # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code # int formatted_code; # // clang-format off # void unformatted_code ; # // clang-format on # void formatted_code_again; DisableFormat: false Standard: Cpp11 AccessModifierOffset: -4 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: false AllowAllArgumentsOnNextLine: true AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: false BinPackParameters: false # Configure each individual brace in BraceWrapping BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: { AfterCaseLabel: 'true' AfterClass: 'true' AfterControlStatement: 'true' AfterEnum : 'true' AfterFunction : 'true' AfterNamespace : 'true' AfterStruct : 'true' AfterUnion : 'true' BeforeCatch : 'true' BeforeElse : 'true' IndentBraces : 'false' # AfterExternBlock : 'true' } #BreakAfterJavaFieldAnnotations: true #BreakBeforeInheritanceComma: false #BreakBeforeBinaryOperators: None #BreakBeforeTernaryOperators: true #BreakConstructorInitializersBeforeComma: true #BreakStringLiterals: true CommentPragmas: '^ IWYU pragma:' #CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true SpaceBeforeCpp11BracedList: false DerivePointerAlignment: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IndentCaseLabels: false IndentPPDirectives: None #FixNamespaceComments: true IndentWrappedFunctionNames: true KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' #JavaScriptQuotes: Double MaxEmptyLinesToKeep: 1 NamespaceIndentation: All ObjCBlockIndentWidth: 4 #ObjCSpaceAfterProperty: true #ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: Never SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false #SpaceAfterTemplateKeyword: true #SpaceBeforeInheritanceColon: true #SortUsingDeclarations: true SortIncludes: true # Comments are for developers, they should arrange them ReflowComments: false #IncludeBlocks: Preserve --- hipFFT-rocm-5.7.1/.githooks/000077500000000000000000000000001445203054200155065ustar00rootroot00000000000000hipFFT-rocm-5.7.1/.githooks/install000077500000000000000000000002221445203054200170760ustar00rootroot00000000000000#!/usr/bin/env bash cd $(git rev-parse --git-dir) cd hooks echo "Installing hooks..." ln -s ../../.githooks/pre-commit pre-commit echo "Done!" hipFFT-rocm-5.7.1/.githooks/pre-commit000077500000000000000000000017661445203054200175220ustar00rootroot00000000000000#!/bin/sh # # This pre-commit hook checks if any versions of clang-format # are installed, and if so, uses the installed version to format # the staged changes. base=/opt/rocm/hcc/bin/clang-format format="" # Redirect output to stderr. exec 1>&2 # check if clang-format is installed type "$base" >/dev/null 2>&1 && format="$base" # no versions of clang-format are installed if [ -z "$format" ] then echo "$base is not installed. Pre-commit hook will not be executed." exit 0 fi # Do everything from top - level cd $(git rev-parse --show-toplevel) if git rev-parse --verify HEAD >/dev/null 2>&1 then against=HEAD else # Initial commit: diff against an empty tree object against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 fi # do the formatting for file in $(git diff-index --cached --name-only $against | grep -E '\.h$|\.hpp$|\.cpp$|\.cl$|\.h\.in$|\.hpp\.in$|\.cpp\.in$') do if [ -e "$file" ] then echo "$format $file" "$format" -i -style=file "$file" fi done hipFFT-rocm-5.7.1/.github/000077500000000000000000000000001445203054200151415ustar00rootroot00000000000000hipFFT-rocm-5.7.1/.github/CONTRIBUTING.md000066400000000000000000000031411445203054200173710ustar00rootroot00000000000000 ## Contribution License Agreement 1. The code I am contributing is mine, and I have the right to license it. 2. By submitting a pull request for this project I am granting you a license to distribute said code under the MIT License for the project. ## How to contribute Our code contriubtion guidelines closely follows the model of [GitHub pull-requests](https://help.github.com/articles/using-pull-requests/). This repository follows the [git flow](http://nvie.com/posts/a-successful-git-branching-model/) workflow, which dictates a /master branch where releases are cut, and a /develop branch which serves as an integration branch for new code. * A [git extension](https://github.com/nvie/gitflow) has been developed to ease the use of the 'git flow' methodology, but requires manual installation by the user. Refer to the projects wiki ## Pull-request guidelines * target the **develop** branch for integration * ensure code builds successfully. * do not break existing test cases * new functionality will only be merged with new unit tests * new unit tests should integrate within the existing [googletest framework](https://github.com/google/googletest/blob/master/googletest/docs/Primer.md) * tests must have good code coverage * code must also have benchmark tests, and performance must approach the compute bound limit or memory bound limit. ## Interface * All public APIs are C89 compatible; all other library code should use c++14 * Our minimum supported compiler is clang 3.6 * Avoid CamelCase * This rule applies specifically to publicly visible APIs, but is also encouraged (not mandated) for internal code hipFFT-rocm-5.7.1/.github/ISSUE_TEMPLATE.md000066400000000000000000000004611445203054200176470ustar00rootroot00000000000000### What is the expected behavior - ### What actually happens - ### How to reproduce - ### Environment | Hardware | description | |-----|-----| | GPU | device string | | CPU | device string | | Software | version | |-----|-----| | ROCK | v0.0 | | ROCR | v0.0 | | HCC | v0.0 | | Library | v0.0 | hipFFT-rocm-5.7.1/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000000701445203054200207370ustar00rootroot00000000000000resolves #___ Summary of proposed changes: - - - hipFFT-rocm-5.7.1/.github/dependabot.yml000066400000000000000000000010421445203054200177660ustar00rootroot00000000000000# To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates version: 2 updates: - package-ecosystem: "pip" # See documentation for possible values directory: "/docs/.sphinx" # Location of package manifests open-pull-requests-limit: 10 schedule: interval: "daily" hipFFT-rocm-5.7.1/.gitignore000066400000000000000000000006761445203054200156020ustar00rootroot00000000000000# Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app # vim tags tags .tags .*.swp # Visual Studio Code .vscode # documentation artifacts build/ _build/ _images/ _static/ _templates/ _toc.yml docBin/ docs/.doxygen/hipfft.h # python bytecode __pycache__ hipFFT-rocm-5.7.1/.gitmodules000066400000000000000000000001461445203054200157570ustar00rootroot00000000000000[submodule "rocFFT"] path = clients/rocFFT url = https://github.com/ROCmSoftwarePlatform/rocFFT.git hipFFT-rocm-5.7.1/.jenkins/000077500000000000000000000000001445203054200153205ustar00rootroot00000000000000hipFFT-rocm-5.7.1/.jenkins/common.groovy000066400000000000000000000070051445203054200200610ustar00rootroot00000000000000import static groovy.io.FileType.FILES def runCompileCommand(platform, project, jobName, boolean sameOrg = false) { project.paths.construct_build_prefix() def getDependenciesCommand = "" if (project.installLibraryDependenciesFromCI) { project.libraryDependencies.each { libraryName -> getDependenciesCommand += auxiliary.getLibrary(libraryName, platform.jenkinsLabel, null, sameOrg) } } String cmake = platform.jenkinsLabel.contains('centos') ? "cmake3" : "cmake" String hipClang = platform.jenkinsLabel.contains('hipClang') ? "HIP_COMPILER=clang" : "" String path = platform.jenkinsLabel.contains('centos7') ? "export PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH" : ":" String dir = jobName.contains('Debug') ? "debug" : "release" // hipcc with CUDA backend needs HIP_PLATFORM set accordingly in the environment String hipPlatformCommand = platform.jenkinsLabel.contains("cuda") ? "export HIP_PLATFORM=nvidia" : "" def command = """#!/usr/bin/env bash set -x ls /fftw/lib export FFTW_ROOT=/fftw export FFTW_INCLUDE_PATH=\${FFTW_ROOT}/include export FFTW_LIB_PATH=\${FFTW_ROOT}/lib export LD_LIBRARY_PATH=\${FFTW_LIB_PATH}:/opt/rocm/lib:/opt/rocm/hip/lib export CPLUS_INCLUDE_PATH=\${FFTW_INCLUDE_PATH}:\${CPLUS_INCLUDE_PATH} export CMAKE_PREFIX_PATH=\${FFTW_LIB_PATH}/cmake/fftw3:\${CMAKE_PREFIX_PATH} export CMAKE_PREFIX_PATH=\${FFTW_LIB_PATH}/cmake/fftw3f:\${CMAKE_PREFIX_PATH} # default container flags cause problems for CUDA backend, and aren't useful for ROCm unset HIPCC_COMPILE_FLAGS_APPEND unset HIPCC_LINK_FLAGS_APPEND ${hipPlatformCommand} cd ${project.paths.project_build_prefix} mkdir -p build/${dir} && cd build/${dir} ${getDependenciesCommand} ${path} ${hipClang} ${cmake} ${project.paths.build_command} make -j\$(nproc) """ platform.runCommand(this, command) } def runTestCommand (platform, project, gfilter) { String cudaArgs = platform.jenkinsLabel.contains('cuda') ? '--double_epsilon=5e-11' : '--precompile=rocfft-test-precompile.db' def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix}/build/release/clients/staging GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipfft-test ${cudaArgs} --gtest_output=xml --gtest_color=yes --gtest_filter=${gfilter} """ platform.runCommand(this, command) junit "${project.paths.project_build_prefix}/build/release/clients/staging/*.xml" } def runPackageCommand(platform, project, jobName, label='') { def command label = label != '' ? '-' + label.toLowerCase() : '' String ext = platform.jenkinsLabel.contains('ubuntu') ? "deb" : "rpm" String dir = jobName.contains('Debug') ? "debug" : "release" command = """ set -x cd ${project.paths.project_build_prefix}/build/${dir} make package mkdir -p package for f in hipfft*.$ext do mv "\$f" "hipfft${label}-\${f#*-}" done mv *.${ext} package/ """ platform.runCommand(this, command) platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/build/${dir}/package/*.${ext}""") } return this hipFFT-rocm-5.7.1/.jenkins/debug.groovy000066400000000000000000000052541445203054200176630ustar00rootroot00000000000000#!/usr/bin/env groovy @Library('rocJenkins@pong') _ import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand -> def prj = new rocProject('hipFFT-internal', 'Debug') // customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['rocRAND','rocFFT-internal'] // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> project.paths.construct_build_prefix() commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, null, null) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])]] propertyList = auxiliary.appendPropertyList(propertyList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } def hostJobNameList = ["compute-rocm-dkms-no-npi":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] def hipClangJobNameList = ["compute-rocm-dkms-no-npi":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] String hostBuildCommand = '-DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -L ../..' String hipClangBuildCommand = '-DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc -DCMAKE_BUILD_TYPE=Debug -DBUILD_CLIENTS_TESTS=ON -DBUILD_CLIENTS_SAMPLES=ON -DBUILD_CLIENTS_SAMPLES=ON -L ../..' setupCI(urlJobName, hostJobNameList, hostBuildCommand, runCI, 'g++') setupCI(urlJobName, hipClangJobNameList, hipClangBuildCommand, runCI, 'hip-clang') } hipFFT-rocm-5.7.1/.jenkins/precheckin-cuda.groovy000066400000000000000000000076111445203054200216210ustar00rootroot00000000000000#!/usr/bin/env groovy @Library('rocJenkins@pong') _ import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label, runTest -> def prj = new rocProject('hipFFT-internal', 'PreCheckin-Cuda') // customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['hipRAND'] prj.timeout.test = 600 // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> project.paths.construct_build_prefix() commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName, true) } def testCommand = { platform, project-> def gfilter = '-*swap*' commonGroovy.runTestCommand(platform, project, gfilter) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, runTest ? testCommand : null, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label, runTest) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label, runTest) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI(['ubuntu20-cuda11':['anycuda']], urlJobName, buildCommand, label, runTest) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = [] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = [:] propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String compilerVar = ' -DCMAKE_CXX_COMPILER=' String gBuildCommand = ' -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DBUILD_WITH_LIB=CUDA -DHIP_INCLUDE_DIRS=/opt/rocm/hip/include \ -DCMAKE_MODULE_PATH="/opt/rocm/lib/cmake/hip;/opt/rocm/hip/cmake;/opt/rocm/share/rocm/cmake" \ -L ../..' String boostLibraryDir = ' -DBOOST_LIBRARYDIR=/usr/lib/x86_64-linux-gnu' // Run tests on normal g++ build setupCI(urlJobName, jobNameList, compilerVar + 'g++' + gBuildCommand, runCI, 'g++', false) // Also build with hipcc+CUDA backend, both shared and static lib. // Static build allows the hipFFT callback sample to be built. // Skip tests since the first build would have already run tests. String hBuildCommand = ' -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DBUILD_WITH_LIB=CUDA -DHIP_INCLUDE_DIRS=/opt/rocm/hip/include \ -DCMAKE_MODULE_PATH="/opt/rocm/lib/cmake/hip;/opt/rocm/hip/cmake;/opt/rocm/share/rocm/cmake" \ -DCMAKE_CXX_FLAGS="-gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86" \ -DBUILD_CLIENTS=ON -L ../..' setupCI(urlJobName, jobNameList, compilerVar + 'hipcc' + hBuildCommand + boostLibraryDir, runCI, 'hipcc', true) setupCI(urlJobName, jobNameList, compilerVar + 'hipcc' + hBuildCommand + boostLibraryDir + ' -DBUILD_SHARED_LIBS=OFF', runCI, 'hipcc-static', false) } hipFFT-rocm-5.7.1/.jenkins/precheckin.groovy000066400000000000000000000060311445203054200207020ustar00rootroot00000000000000#!/usr/bin/env groovy @Library('rocJenkins@pong') _ import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label, runTest -> def prj = new rocProject('hipFFT-internal', 'PreCheckin') // customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['rocRAND','rocFFT-internal'] prj.timeout.test = 360 // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> project.paths.construct_build_prefix() commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project,jobName) } def testCommand = { platform, project-> def gfilter = "-*len_768_*:*len_2880_*" commonGroovy.runTestCommand(platform, project, gfilter) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, runTest ? testCommand : null, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label, runTest) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label, runTest) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = '-DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -L ../..' String hipClangBuildCommand = '-DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_CLIENTS_TESTS=ON -DBUILD_CLIENTS_SAMPLES=ON -L ../..' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++', false) setupCI(urlJobName, jobNameList, hipClangBuildCommand, runCI, 'hip-clang', true) } hipFFT-rocm-5.7.1/.jenkins/staticanalysis.groovy000066400000000000000000000016271445203054200216300ustar00rootroot00000000000000#!/usr/bin/env groovy @Library('rocJenkins@pong') _ import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label -> def prj = new rocProject('hipFFT-internal', 'PreCheckin') // customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['rocRAND','rocFFT-internal'] // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = true boolean staticAnalysis = true buildProject(prj, formatCheck, nodes.dockerArray, null, null, null, staticAnalysis) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 7')])])) stage(urlJobName) { runCI([ubuntu20:['any']], urlJobName) } } hipFFT-rocm-5.7.1/.jenkins/staticlibrary.groovy000066400000000000000000000057421445203054200214530ustar00rootroot00000000000000#!/usr/bin/env groovy @Library('rocJenkins@pong') _ import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label -> def prj = new rocProject('hipFFT-internal', 'StaticLibrary') // customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['rocRAND','rocFFT-internal'] // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> project.paths.construct_build_prefix() commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName, true) } def testCommand = { platform, project-> def gfilter = "*" commonGroovy.runTestCommand(platform, project, gfilter) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu16:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])]] propertyList = auxiliary.appendPropertyList(propertyList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } def hostJobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900']])] def hipClangJobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900']])] String hostBuildCommand = '-DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=OFF -L ../..' String hipClangBuildCommand = '-DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_CLIENTS_TESTS=ON -DBUILD_CLIENTS_SAMPLES=ON -DBUILD_SHARED_LIBS=OFF -L ../..' setupCI(urlJobName, hostJobNameList, hostBuildCommand, runCI, 'g++') setupCI(urlJobName, hipClangJobNameList, hipClangBuildCommand, runCI, 'hip-clang') } hipFFT-rocm-5.7.1/.readthedocs.yaml000066400000000000000000000004171445203054200170320ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/conf.py formats: [htmlzip] python: version: "3.8" install: - requirements: docs/.sphinx/requirements.txt hipFFT-rocm-5.7.1/CHANGELOG.md000066400000000000000000000060021445203054200154100ustar00rootroot00000000000000# Change log for hipFFT Partial documentation for hipFFT is available at [hipFFT]. ## hipFFT 1.0.12 for ROCm 5.6.0 ### Added - Implemented the hipfftXtMakePlanMany, hipfftXtGetSizeMany, hipfftXtExec APIs, to allow requesting half-precision transforms. ### Changed - Added --precision argument to benchmark/test clients. --double is still accepted but is deprecated as a method to request a double-precision transform. ## hipFFT 1.0.11 for ROCm 5.5.0 ### Fixed - Fixed old version rocm include/lib folders not removed on upgrade. ## hipFFT 1.0.10 for ROCm 5.4.0 ### Added - Added hipfftExtPlanScaleFactor API to efficiently multiply each output element of a FFT by a given scaling factor. Result scaling must be supported in the backend FFT library. ### Changed - When hipFFT is built against the rocFFT backend, rocFFT 1.0.19 or higher is now required. - Data is initialized directly on GPUs using hipRAND. - Updated build files to use standard C++17. ## hipFFT 1.0.9 for ROCm 5.3.0 ### Changed - Clean up build warnings. - GNUInstall Dir enhancements. - Requires gtest 1.11. ## hipFFT 1.0.8 for ROCm 5.2.0 ### Added - Added File/Folder Reorg Changes with backward compatibility support using ROCM-CMAKE wrapper functions. - Packages for test and benchmark executables on all supported OSes using CPack. - Implemented hipfftMakePlanMany64 and hipfftGetSizeMany64. ## hipFFT 1.0.7 for ROCm 5.1.0 ### Changed - Use fft_params struct for accuracy and benchmark clients. ## hipFFT 1.0.6 for ROCm 5.0.0 ### Fixed - Fixed incorrect reporting of rocFFT version. ### Changed - Unconditionally enabled callback functionality. On the CUDA backend, callbacks only run correctly when hipFFT is built as a static library, and is linked against the static cuFFT library. ## hipFFT 1.0.5 for ROCm 4.5.0 ### Added - Added support for Windows 10 as a build target. ### Changed - Packaging split into a runtime package called hipfft and a development package called hipfft-devel. The development package depends on runtime. The runtime package suggests the development package for all supported OSes except CentOS 7 to aid in the transition. The suggests feature in packaging is introduced as a deprecated feature and will be removed in a future rocm release. ## hipFFT 1.0.4 for ROCm 4.4.0 ### Fixed - Add calls to rocFFT setup/cleanup. - Cmake fixes for clients and backend support. ### Added - Added support for Windows 10 as a build target. ## hipFFT 1.0.3 for ROCm 4.3.0 ### Fixed - Cmake updates. ### Added - Added callback API in hipfftXt.h header. ## hipFFT 1.0.2 for ROCm 4.2.0 - No changes. ## hipFFT 1.0.1 for ROCm 4.1.0 ### Fixed - Fix batch support for `hipfftMakePlanMany`. - Fix work area handling during plan creation and `hipfftSetWorkArea`. - Honour `autoAllocate` flag. ### Changed - Testing infrastructure reuses code from [rocFFT]. [rocFFT]: https://github.com/ROCmSoftwarePlatform/rocFFT [hipFFT]: https://github.com/ROCmSoftwarePlatform/hipFFT [hipfft.readthedocs.io]: https://rocfft.readthedocs.io/en/latest/ hipFFT-rocm-5.7.1/CMakeLists.txt000066400000000000000000000230301445203054200163370ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################ # CMake version according to latest ROCm platform requirements cmake_minimum_required( VERSION 3.16 ) # We use C++17 features, this will add compile option: -std=c++17 set( CMAKE_CXX_STANDARD 17 ) set(CMAKE_CXX_EXTENSIONS OFF) # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if( WIN32 ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" ) else() set( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories" ) endif() # Workarounds.. list( APPEND CMAKE_PREFIX_PATH /opt/rocm/llvm /opt/rocm ) list( APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip /opt/rocm/lib/cmake/hip /opt/rocm/hip/cmake ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. # MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() project( hipfft LANGUAGES CXX ) # Build options option( BUILD_SHARED_LIBS "Build ${PROJECT_NAME} as a shared library" ON ) option( BUILD_VERBOSE "Output additional build information" OFF ) set( BUILD_WITH_COMPILER "HOST-default" CACHE INTERNAL "Build ${PROJECT_NAME} with compiler HIP-clang, HIP-nvcc, or just the host default compiler, eg g++") set( BUILD_WITH_LIB "ROCM" CACHE STRING "Build ${PROJECT_NAME} with ROCM or CUDA libraries" ) option( BUILD_CLIENTS "Build all clients" OFF) option( BUILD_CLIENTS_RIDER "Build benchmark client" OFF ) option( BUILD_CLIENTS_TESTS "Build ${PROJECT_NAME} tests (requires 3rd dependencies)" OFF ) option( BUILD_CLIENTS_SAMPLES "Build examples" OFF ) option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) option( WERROR "Treat warnings as errors" OFF ) # Set internal BUILD_WITH_COMPILER. if( NOT CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) set( BUILD_WITH_COMPILER "HOST-default" ) else() if( $ENV{HIP_PLATFORM} MATCHES "nvidia" ) set( BUILD_WITH_COMPILER "HIP-nvcc" ) else() set( BUILD_WITH_COMPILER "HIP-clang" ) if( NOT BUILD_WITH_LIB STREQUAL "ROCM" ) message( FATAL_ERROR "Detected HIP_COMPILER=clang, but BUILD_WITH_LIB is not ROCM!" ) endif() endif() endif() string( TOUPPER "${BUILD_WITH_COMPILER}" BUILD_WITH_COMPILER ) string( TOUPPER "${BUILD_WITH_LIB}" BUILD_WITH_LIB ) set( WARNING_FLAGS -Wall -Wno-unused-function -Wimplicit-fallthrough -Wunreachable-code -Wno-unknown-pragmas) if( WERROR ) set( WARNING_FLAGS ${WARNING_FLAGS} -Werror ) endif() if (BUILD_WITH_COMPILER STREQUAL "HIP-NVCC" ) set (BUILD_WITH_LIB "CUDA") set( HIP_PLATFORM "nvidia" ) set( CMAKE_CXX_EXTENSIONS OFF ) set( CMAKE_CXX_COMPILE_OPTIONS_PIC "-Xcompiler=${CMAKE_CXX_COMPILE_OPTIONS_PIC}" ) set( CMAKE_SHARED_LIBRARY_C_FLAGS "-Xlinker=${CMAKE_SHARED_LIBRARY_C_FLAGS}" ) set( CMAKE_SHARED_LIBRARY_CXX_FLAGS "-Xlinker=${CMAKE_SHARED_LIBRARY_CXX_FLAGS}" ) set( CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-Xlinker=-soname," ) set( CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG "-Xlinker=-soname," ) set( CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG "-Xlinker=-rpath," ) set( CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG "-Xlinker=-rpath," ) set( CMAKE_EXECUTABLE_RUNTIME_C_FLAG "-Xlinker=-rpath," ) set( CMAKE_EXECUTABLE_RUNTIME_CXX_FLAG "-Xlinker=-rpath," ) set( CMAKE_C_COMPILE_OPTIONS_VISIBILITY "-Xcompiler='${CMAKE_C_COMPILE_OPTIONS_VISIBILITY}'" ) set( CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY "-Xcompiler='${CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY}'" ) set( CMAKE_C_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN "-Xcompiler='${CMAKE_C_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN}'" ) set( CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN "-Xcompiler='${CMAKE_CXX_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN}'" ) foreach( FLAG IN ITEMS ${WARNING_FLAGS} ) set( NVCC_WARNING_FLAGS ${NVCC_WARNING_FLAGS} "-Xcompiler=${FLAG}" ) endforeach() set( WARNING_FLAGS ${NVCC_WARNING_FLAGS} ) else() # Define GPU targets set( AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102 CACHE STRING "List of specific machine types for library to target" ) if( BUILD_WITH_COMPILER STREQUAL "HIP-CLANG" ) set( HIP_PLATFORM "amd" ) set( HIP_COMPILER "clang" ) if(BUILD_ADDRESS_SANITIZER) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") add_link_options(-fuse-ld=lld) endif() endif() endif() # Show the actual compiler(internal option) message(STATUS "BUILD_WITH_COMPILER = " ${BUILD_WITH_COMPILER}) # Dependencies include(cmake/dependencies.cmake) # FOR HANDLING ENABLE/DISABLE OPTIONAL BACKWARD COMPATIBILITY for FILE/FOLDER REORG option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" ON) if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY AND NOT WIN32) rocm_wrap_header_dir( ${CMAKE_SOURCE_DIR}/library/include PATTERNS "*.h" GUARDS SYMLINK WRAPPER WRAPPER_LOCATIONS ${CMAKE_INSTALL_INCLUDEDIR} ) endif() # Version set( VERSION_STRING "1.0.12" ) set( hipfft_SOVERSION 0.1 ) if( ROCM_FOUND ) rocm_setup_version( VERSION ${VERSION_STRING} ) endif() add_subdirectory( library ) # Build clients of the library if( BUILD_CLIENTS ) set( BUILD_CLIENTS_RIDER ON ) set( BUILD_CLIENTS_SAMPLES ON ) set( BUILD_CLIENTS_TESTS ON ) endif() # Build clients of the library if( BUILD_CLIENTS_RIDER OR BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS ) include( clients/cmake/build-options.cmake ) rocm_package_setup_component(clients) if(BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_RIDER) find_package( Boost COMPONENTS program_options REQUIRED) set(BOOST_DEB "libboost-program-options${Boost_VERSION_MAJOR}.${Boost_VERSION_MINOR}.${Boost_VERSION_PATCH}") set(BOOST_RPM "boost-program-options = ${Boost_VERSION_MAJOR}.${Boost_VERSION_MINOR}.${Boost_VERSION_PATCH}") endif() if(NOT CLIENTS_OS) rocm_set_os_id(CLIENTS_OS) string(TOLOWER "${CLIENTS_OS}" CLIENTS_OS) rocm_read_os_release(CLIENTS_OS_VERSION VERSION_ID) endif() message(STATUS "OS: ${CLIENTS_OS} ${CLIENTS_OS_VERSION}") set(FFTW_DEB "libfftw3-bin") if(CLIENTS_OS STREQUAL "sles") set(BOOST_RPM RPM "libboost_program_options${Boost_VERSION_MAJOR}_${Boost_VERSION_MINOR}_${Boost_VERSION_PATCH}") set(FFTW_RPM "libfftw3-3") else() set(FFTW_RPM "fftw-libs") endif() if(BUILD_CLIENTS_TESTS) rocm_package_setup_client_component( tests DEPENDS DEB ${BOOST_DEB} ${FFTW_DEB} rocrand RPM ${BOOST_RPM} ${FFTW_RPM} rocrand ) endif() if(BUILD_CLIENTS_RIDER) rocm_package_setup_client_component( benchmarks DEPENDS DEB ${BOOST_DEB} rocrand RPM ${BOOST_RPM} rocrand ) endif() add_subdirectory( clients ) endif() # Packaging... if(WIN32) set(CPACK_SOURCE_GENERATOR "ZIP") set(CPACK_GENERATOR "ZIP") set(CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path" FORCE) set(INSTALL_PREFIX "C:/hipSDK") set(CPACK_SET_DESTDIR OFF) set(CPACK_PACKAGE_INSTALL_DIRECTORY "C:/hipSDK") set(CPACK_PACKAGING_INSTALL_PREFIX "") set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) endif() if( ROCM_FOUND ) # Package specific CPACK vars if( NOT BUILD_WITH_LIB STREQUAL "CUDA" ) rocm_package_add_dependencies(DEPENDS "rocfft >= 1.0.19") else() rocm_package_add_dependencies(DEPENDS "cufft >= 10.0.0") endif() set( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" ) set( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" ) # Give hipfft compiled for CUDA backend a different name if( BUILD_WITH_LIB STREQUAL "ROCM" ) set( package_name hipfft ) else() set( package_name hipfft-alt ) endif() set( HIPFFT_CONFIG_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" CACHE PATH "Path placed into ldconfig file" ) rocm_create_package( NAME ${package_name} DESCRIPTION "ROCm FFT marshalling library" MAINTAINER "hipfft-maintainer@amd.com" LDCONFIG LDCONFIG_DIR ${HIPFFT_CONFIG_DIR} ) endif() hipFFT-rocm-5.7.1/CppCheckSuppressions.txt000066400000000000000000000001531445203054200204570ustar00rootroot00000000000000// has some false positives and isn't hard to run manually for periodic // dead code sweeps unusedFunction hipFFT-rocm-5.7.1/LICENSE.md000066400000000000000000000021311445203054200152020ustar00rootroot00000000000000MIT License Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. hipFFT-rocm-5.7.1/README.md000066400000000000000000000071421445203054200150640ustar00rootroot00000000000000# hipFFT hipFFT is an FFT marshalling library. Currently, hipFFT supports either [rocFFT] or [cuFFT] as backends. hipFFT exports an interface that does not require the client to change, regardless of the chosen backend. It sits between the application and the backend FFT library, marshalling inputs into the backend and results back to the application. [rocFFT]: https://github.com/ROCmSoftwarePlatform/rocFFT [cuFFT]: https://developer.nvidia.com/cufft ## Documentation Run the steps below to build documentation locally. ``` cd docs pip3 install -r .sphinx/requirements.txt python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html ``` ## Installing pre-built packages Download pre-built packages either from [ROCm's package servers]. * On Ubuntu: `sudo apt update && sudo apt install hipfft` [ROCm's package servers]: https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html ## Transitioning from rocFFT If you are transitioning from the hipFFT version included in rocFFT to this standalone hipFFT version; please modify your build following this example: * previously: `hipcc hipfft_1d_z2z.cpp -L/opt/rocm/lib -lrocfft` * during transition: `hipcc -I/opt/rocm/hipfft/include hipfft_1d_z2z.cpp -L/opt/rocm/lib -lhipfft -lrocfft` ## Building from source ### Library build dependencies To build the hipFFT library: * hipFFT depends on [rocFFT] on AMD platforms; * hipFFT depends on [cuFFT] on Nvidia platforms. ### Client build dependencies * The clients (samples, tests etc) included with the hipFFT source depend on FFTW, gtest, and boost program-options. * The rider and test clients also require the rocFFT source tree to build: git submodule update --init ### Building hipFFT To show all build options: mkdir build && cd build cmake -LH .. Here are some CMake build examples: | Hardware target | Case | Build command line | | --- | --- | --- | | AMD GPU | Build a project using HIP language APIs + hipFFT with standard host compiler | cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -L .. | | AMD GPU | Build a project using HIP language APIs + hipFFT + device kernels with HIP-clang | cmake -DCMAKE_CXX_COMPILER=hipcc -DCMAKE_BUILD_TYPE=Release -DBUILD_CLIENTS=ON -L .. | | NVIDIA GPU | Build a project using HIP language APIs + hipFFT with standard host compiler | cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DBUILD_WITH_LIB=CUDA -L .. | | NVIDIA GPU | Build a project using HIP language APIs + hipFFT + device kernels with HIP-nvcc | HIP_PLATFORM=nvidia cmake -DCMAKE_CXX_COMPILER=hipcc -DCMAKE_BUILD_TYPE=Release -DBUILD_CLIENTS=ON -L .. | Note that the option -DBUILD_CLIENTS=ON is only allowed for the hipcc compiler. ## Quick CUDA porting guide If you have existing CUDA code and want to transition to HIP: * [HIPIFY] your code and fix all unsupported CUDA features or user-defined macros. * Build with HIP-nvcc to run on an Nvidia device. * Build with HIP-clang to run on an AMD device. More information about porting to HIP is available on the [HIP porting guide]. [HIPIFY]: https://github.com/ROCm-Developer-Tools/HIPIFY [HIP porting guide]: https://rocmdocs.amd.com/en/latest/Programming_Guides/HIP-porting-guide.html hipFFT-rocm-5.7.1/clients/000077500000000000000000000000001445203054200152425ustar00rootroot00000000000000hipFFT-rocm-5.7.1/clients/CMakeLists.txt000066400000000000000000000120351445203054200200030ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2020 - 2023 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################# # CMake version according to latest ROCm platform requirements cmake_minimum_required( VERSION 3.16 ) # We use C++17 features, this will add compile option: -std=c++17 set( CMAKE_CXX_STANDARD 17 ) set(CMAKE_CXX_EXTENSIONS OFF) # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if( WIN32 ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" ) else( ) set( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories" ) endif( ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does # not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() # This project may compile dependencies for clients project( hipfft-clients LANGUAGES CXX ) list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ) include( build-options ) if( NOT CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) if(BUILD_CLIENTS) message( FATAL_ERROR "Using BUILD_CLIENTS=ON requires hipcc or Clang++, as client programs require device code to be built." ) endif() endif() if( GIT_FOUND AND EXISTS "${CMAKE_SOURCE_DIR}/.git" ) message(STATUS "rocFFT submodule update") execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/clients/rocFFT RESULT_VARIABLE GIT_SUBMOD_RESULT) if( NOT GIT_SUBMOD_RESULT EQUAL "0" ) message(FATAL_ERROR "git submodule update --init --recursive failed with ${GIT_SUBMOD_RESULT}, please checkout submodules manually.") endif( ) endif( ) if( NOT EXISTS "${CMAKE_SOURCE_DIR}/clients/rocFFT/CMakeLists.txt" ) message(FATAL_ERROR "The rocFFT submodule is not present! Please update git submodules and try again. ${CMAKE_CURRENT_SOURCE_DIR}/clients/rocFFT/CMakeLists.txt") endif( ) # This option only works for make/nmake and the ninja generators, but no reason it shouldn't be on # all the time # This tells cmake to create a compile_commands.json file that can be used with clang tooling or vim set( CMAKE_EXPORT_COMPILE_COMMANDS ON ) # if hipfft is not a target, then we know clients are built separately from the library and we must # search for the hipfft package if( NOT TARGET hipfft ) find_package( hipfft REQUIRED CONFIG PATHS ) endif( ) if( BUILD_CLIENTS_SAMPLES ) add_subdirectory( samples ) endif( ) if( BUILD_CLIENTS_TESTS ) find_package( GTest 1.11.0 ) include( ExternalProject ) if( NOT GTEST_FOUND ) set( GTEST_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/src/gtest/googletest/include ) set( GTEST_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/src/gtest-build/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_CURRENT_BINARY_DIR}/src/gtest-build/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX} ) ExternalProject_Add( gtest URL https://github.com/google/googletest/archive/release-1.11.0.tar.gz URL_HASH SHA256=b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5 PREFIX ${CMAKE_CURRENT_BINARY_DIR} CMAKE_ARGS -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DBUILD_SHARED_LIBS=OFF INSTALL_COMMAND "" BUILD_BYPRODUCTS ${GTEST_LIBRARIES} ) ExternalProject_Get_Property( gtest source_dir binary_dir ) endif() add_subdirectory( tests ) endif( ) if( BUILD_CLIENTS_RIDER ) add_subdirectory( rider ) endif( ) hipFFT-rocm-5.7.1/clients/cmake/000077500000000000000000000000001445203054200163225ustar00rootroot00000000000000hipFFT-rocm-5.7.1/clients/cmake/FindFFTW.cmake000066400000000000000000000105451445203054200207000ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################# #if( FFTW_FIND_VERSION VERSION_LESS "3" ) # message( FFTW_FIND_VERION is ${FFTW_FIND_VERSION}) # message( FATAL_ERROR "FindFFTW can not configure versions less than FFTW 3.0.0" ) #endif( ) find_path(FFTW_INCLUDE_DIRS NAMES fftw3.h HINTS ${FFTW_ROOT}/include $ENV{FFTW_ROOT}/include PATHS /usr/include /usr/local/include ) mark_as_advanced( FFTW_INCLUDE_DIRS ) # message( STATUS "FFTW_FIND_COMPONENTS: ${FFTW_FIND_COMPONENTS}" ) # message( STATUS "FFTW_FIND_REQUIRED_FLOAT: ${FFTW_FIND_REQUIRED_FLOAT}" ) # message( STATUS "FFTW_FIND_REQUIRED_DOUBLE: ${FFTW_FIND_REQUIRED_DOUBLE}" ) set( FFTW_LIBRARIES "" ) if( FFTW_FIND_REQUIRED_FLOAT OR FFTW_FIND_REQUIRED_SINGLE ) find_library( FFTW_LIBRARIES_SINGLE NAMES fftw3f fftw3f-3 fftw3 fftw3-3 HINTS ${FFTW_ROOT}/lib $ENV{FFTW_ROOT}/lib PATHS /usr/lib /usr/local/lib PATH_SUFFIXES x86_64-linux-gnu DOC "FFTW dynamic library single" ) mark_as_advanced( FFTW_LIBRARIES_SINGLE ) list( APPEND FFTW_LIBRARIES ${FFTW_LIBRARIES_SINGLE} ) # Look for omp (preferred) or thread libraries. These are not a # hard requirement, but are nice to have to make FFTW run faster. find_library( FFTWF_OMP_LIBRARY fftw3f_omp ) find_library( FFTWF_THREADS_LIBRARY fftw3f_threads ) if( FFTWF_OMP_LIBRARY ) list( APPEND FFTW_LIBRARIES ${FFTWF_OMP_LIBRARY} ) set( FFTW_MULTITHREAD TRUE ) elseif( FFTWF_THREADS_LIBRARY ) list( APPEND FFTW_LIBRARIES ${FFTWF_THREADS_LIBRARY} ) set( FFTW_MULTITHREAD TRUE ) endif() endif( ) if( FFTW_FIND_REQUIRED_DOUBLE ) find_library( FFTW_LIBRARIES_DOUBLE NAMES fftw3 HINTS ${FFTW_ROOT}/lib $ENV{FFTW_ROOT}/lib PATHS /usr/lib /usr/local/lib PATH_SUFFIXES x86_64-linux-gnu DOC "FFTW dynamic library double" ) mark_as_advanced( FFTW_LIBRARIES_DOUBLE ) list( APPEND FFTW_LIBRARIES ${FFTW_LIBRARIES_DOUBLE} ) # Look for omp (preferred) or thread libraries. These are not a # hard requirement, but are nice to have to make FFTW run faster. find_library( FFTW_OMP_LIBRARY fftw3_omp ) find_library( FFTW_THREADS_LIBRARY fftw3_threads ) if( FFTW_OMP_LIBRARY ) list( APPEND FFTW_LIBRARIES ${FFTW_OMP_LIBRARY} ) set( FFTW_MULTITHREAD TRUE ) elseif( FFTW_THREADS_LIBRARY ) list( APPEND FFTW_LIBRARIES ${FFTW_THREADS_LIBRARY} ) set( FFTW_MULTITHREAD TRUE ) endif() endif( ) include( FindPackageHandleStandardArgs ) FIND_PACKAGE_HANDLE_STANDARD_ARGS( FFTW REQUIRED_VARS FFTW_INCLUDE_DIRS FFTW_LIBRARIES ) # assume the threads feature is always enabled on Windows, since it's # not a separate library there if( FFTW_FOUND AND WIN32 ) set( FFTW_MULTITHREAD TRUE ) endif() if( NOT FFTW_FOUND ) message( STATUS "FindFFTW could not find all of the following fftw libraries" ) message( STATUS "${FFTW_FIND_COMPONENTS}" ) else( ) message(STATUS "FindFFTW configured variables:" ) message(STATUS "FFTW_INCLUDE_DIRS: ${FFTW_INCLUDE_DIRS}" ) message(STATUS "FFTW_LIBRARIES: ${FFTW_LIBRARIES}" ) endif() hipFFT-rocm-5.7.1/clients/cmake/build-options.cmake000066400000000000000000000036751445203054200221270ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################# # This file is intended to be used in two ways; independently in a stand alone PROJECT # and as part of a superbuild. If the file is included in a stand alone project, the # variables are not expected to be preset, and this will produce options() in the GUI # for the user to examine. If this file is included in a superbuild, the options will be # presented in the superbuild GUI, but then passed into the ExternalProject as -D # parameters, which would already define them. if( NOT BUILD_CLIENTS_TESTS ) option( BUILD_CLIENTS_TESTS "Build hipFFT unit tests" OFF ) endif( ) if( NOT BUILD_CLIENTS_SAMPLES ) option( BUILD_CLIENTS_SAMPLES "Build hipFFT samples" OFF ) endif( ) hipFFT-rocm-5.7.1/clients/hipfft_params.h000066400000000000000000000634401445203054200202450ustar00rootroot00000000000000// Copyright (C) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #ifndef HIPFFT_PARAMS_H #define HIPFFT_PARAMS_H #include #include "hipfft.h" #include "hipfftXt.h" #include "rocFFT/shared/fft_params.h" inline fft_status fft_status_from_hipfftparams(const hipfftResult_t val) { switch(val) { case HIPFFT_SUCCESS: return fft_status_success; case HIPFFT_INVALID_PLAN: case HIPFFT_ALLOC_FAILED: return fft_status_failure; case HIPFFT_INVALID_TYPE: case HIPFFT_INVALID_VALUE: case HIPFFT_INVALID_SIZE: case HIPFFT_INCOMPLETE_PARAMETER_LIST: case HIPFFT_INVALID_DEVICE: case HIPFFT_NOT_IMPLEMENTED: case HIPFFT_NOT_SUPPORTED: return fft_status_invalid_arg_value; case HIPFFT_INTERNAL_ERROR: case HIPFFT_EXEC_FAILED: case HIPFFT_SETUP_FAILED: case HIPFFT_UNALIGNED_DATA: case HIPFFT_PARSE_ERROR: return fft_status_failure; case HIPFFT_NO_WORKSPACE: return fft_status_invalid_work_buffer; default: return fft_status_failure; } } inline std::string hipfftResult_string(const hipfftResult_t val) { switch(val) { case HIPFFT_SUCCESS: return "HIPFFT_SUCCESS (0)"; case HIPFFT_INVALID_PLAN: return "HIPFFT_INVALID_PLAN (1)"; case HIPFFT_ALLOC_FAILED: return "HIPFFT_ALLOC_FAILED (2)"; case HIPFFT_INVALID_TYPE: return "HIPFFT_INVALID_TYPE (3)"; case HIPFFT_INVALID_VALUE: return "HIPFFT_INVALID_VALUE (4)"; case HIPFFT_INTERNAL_ERROR: return "HIPFFT_INTERNAL_ERROR (5)"; case HIPFFT_EXEC_FAILED: return "HIPFFT_EXEC_FAILED (6)"; case HIPFFT_SETUP_FAILED: return "HIPFFT_SETUP_FAILED (7)"; case HIPFFT_INVALID_SIZE: return "HIPFFT_INVALID_SIZE (8)"; case HIPFFT_UNALIGNED_DATA: return "HIPFFT_UNALIGNED_DATA (9)"; case HIPFFT_INCOMPLETE_PARAMETER_LIST: return "HIPFFT_INCOMPLETE_PARAMETER_LIST (10)"; case HIPFFT_INVALID_DEVICE: return "HIPFFT_INVALID_DEVICE (11)"; case HIPFFT_PARSE_ERROR: return "HIPFFT_PARSE_ERROR (12)"; case HIPFFT_NO_WORKSPACE: return "HIPFFT_NO_WORKSPACE (13)"; case HIPFFT_NOT_IMPLEMENTED: return "HIPFFT_NOT_IMPLEMENTED (14)"; case HIPFFT_NOT_SUPPORTED: return "HIPFFT_NOT_SUPPORTED (16)"; default: return "invalid hipfftResult"; } } class hipfft_params : public fft_params { public: // plan handles are pointers for rocFFT backend, and ints for cuFFT #ifdef __HIP_PLATFORM_AMD__ static constexpr hipfftHandle INVALID_PLAN_HANDLE = nullptr; #else static constexpr hipfftHandle INVALID_PLAN_HANDLE = -1; #endif hipfftHandle plan = INVALID_PLAN_HANDLE; // hipFFT has two ways to specify transform type - the hipfftType // enum, and separate hipDataType enums for input/output. // hipfftType has no way to express an fp16 transform, so // hipfft_transform_type will not be set in that case. std::optional hipfft_transform_type; hipDataType inputType = HIP_C_32F; hipDataType outputType = HIP_C_32F; int direction; std::vector int_length; std::vector int_inembed; std::vector int_onembed; std::vector ll_length; std::vector ll_inembed; std::vector ll_onembed; hipfft_params(){}; hipfft_params(const fft_params& p) : fft_params(p){}; ~hipfft_params() { free(); }; void free() { if(plan != INVALID_PLAN_HANDLE) { hipfftDestroy(plan); plan = INVALID_PLAN_HANDLE; } } size_t vram_footprint() override { size_t val = fft_params::vram_footprint(); if(setup_structs() != fft_status_success) { throw std::runtime_error("Struct setup failed"); } workbuffersize = 0; // Hack for estimating buffer requirements. workbuffersize = 3 * val; val += workbuffersize; return val; } fft_status setup_structs() { // set direction switch(transform_type) { case fft_transform_type_complex_forward: case fft_transform_type_real_forward: direction = HIPFFT_FORWARD; break; case fft_transform_type_complex_inverse: case fft_transform_type_real_inverse: direction = HIPFFT_BACKWARD; break; } // set i/o types and transform type switch(transform_type) { case fft_transform_type_complex_forward: case fft_transform_type_complex_inverse: { switch(precision) { case fft_precision_half: inputType = HIP_C_16F; outputType = HIP_C_16F; hipfft_transform_type.reset(); break; case fft_precision_single: inputType = HIP_C_32F; outputType = HIP_C_32F; hipfft_transform_type = HIPFFT_C2C; break; case fft_precision_double: inputType = HIP_C_64F; outputType = HIP_C_64F; hipfft_transform_type = HIPFFT_Z2Z; break; } break; } case fft_transform_type_real_forward: { switch(precision) { case fft_precision_half: inputType = HIP_R_16F; outputType = HIP_C_16F; hipfft_transform_type.reset(); break; case fft_precision_single: inputType = HIP_R_32F; outputType = HIP_C_32F; hipfft_transform_type = HIPFFT_R2C; break; case fft_precision_double: inputType = HIP_R_64F; outputType = HIP_C_64F; hipfft_transform_type = HIPFFT_D2Z; break; } break; } case fft_transform_type_real_inverse: { switch(precision) { case fft_precision_half: inputType = HIP_C_16F; outputType = HIP_R_16F; hipfft_transform_type.reset(); break; case fft_precision_single: inputType = HIP_C_32F; outputType = HIP_R_32F; hipfft_transform_type = HIPFFT_C2R; break; case fft_precision_double: inputType = HIP_C_64F; outputType = HIP_R_64F; hipfft_transform_type = HIPFFT_Z2D; break; } break; } default: throw std::runtime_error("Invalid transform type"); } int_length.resize(dim()); int_inembed.resize(dim()); int_onembed.resize(dim()); ll_length.resize(dim()); ll_inembed.resize(dim()); ll_onembed.resize(dim()); switch(dim()) { case 3: ll_inembed[2] = istride[1] / istride[2]; ll_onembed[2] = ostride[1] / ostride[2]; [[fallthrough]]; case 2: ll_inembed[1] = istride[0] / istride[1]; ll_onembed[1] = ostride[0] / ostride[1]; [[fallthrough]]; case 1: ll_inembed[0] = istride[dim() - 1]; ll_onembed[0] = ostride[dim() - 1]; break; default: throw std::runtime_error("Invalid dimension"); } for(size_t i = 0; i < dim(); ++i) { ll_length[i] = length[i]; int_length[i] = length[i]; int_inembed[i] = ll_inembed[i]; int_onembed[i] = ll_onembed[i]; } hipfftResult ret = HIPFFT_SUCCESS; return fft_status_from_hipfftparams(ret); } fft_status create_plan() override { auto fft_ret = setup_structs(); if(fft_ret != fft_status_success) { return fft_ret; } hipfftResult ret{HIPFFT_INTERNAL_ERROR}; switch(get_create_type()) { case PLAN_Nd: { ret = create_plan_Nd(); break; } case PLAN_MANY: { ret = create_plan_many(); break; } case CREATE_MAKE_PLAN_Nd: { ret = create_make_plan_Nd(); break; } case CREATE_MAKE_PLAN_MANY: { ret = create_make_plan_many(); break; } case CREATE_MAKE_PLAN_MANY64: { ret = create_make_plan_many64(); break; } case CREATE_XT_MAKE_PLAN_MANY: { ret = create_xt_make_plan_many(); break; } default: { throw std::runtime_error("no valid plan creation type"); } } return fft_status_from_hipfftparams(ret); } fft_status set_callbacks(void* load_cb_host, void* load_cb_data, void* store_cb_host, void* store_cb_data) override { if(run_callbacks) { if(!hipfft_transform_type) throw std::runtime_error("callbacks require a valid hipfftType"); hipfftResult ret{HIPFFT_EXEC_FAILED}; switch(*hipfft_transform_type) { case HIPFFT_R2C: ret = hipfftXtSetCallback(plan, &load_cb_host, HIPFFT_CB_LD_REAL, &load_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); ret = hipfftXtSetCallback( plan, &store_cb_host, HIPFFT_CB_ST_COMPLEX, &store_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); break; case HIPFFT_D2Z: ret = hipfftXtSetCallback( plan, &load_cb_host, HIPFFT_CB_LD_REAL_DOUBLE, &load_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); ret = hipfftXtSetCallback( plan, &store_cb_host, HIPFFT_CB_ST_COMPLEX_DOUBLE, &store_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); break; case HIPFFT_C2R: ret = hipfftXtSetCallback(plan, &load_cb_host, HIPFFT_CB_LD_COMPLEX, &load_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); ret = hipfftXtSetCallback(plan, &store_cb_host, HIPFFT_CB_ST_REAL, &store_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); break; case HIPFFT_Z2D: ret = hipfftXtSetCallback( plan, &load_cb_host, HIPFFT_CB_LD_COMPLEX_DOUBLE, &load_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); ret = hipfftXtSetCallback( plan, &store_cb_host, HIPFFT_CB_ST_REAL_DOUBLE, &store_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); break; case HIPFFT_C2C: ret = hipfftXtSetCallback(plan, &load_cb_host, HIPFFT_CB_LD_COMPLEX, &load_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); ret = hipfftXtSetCallback( plan, &store_cb_host, HIPFFT_CB_ST_COMPLEX, &store_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); break; case HIPFFT_Z2Z: ret = hipfftXtSetCallback( plan, &load_cb_host, HIPFFT_CB_LD_COMPLEX_DOUBLE, &load_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); ret = hipfftXtSetCallback( plan, &store_cb_host, HIPFFT_CB_ST_COMPLEX_DOUBLE, &store_cb_data); if(ret != HIPFFT_SUCCESS) return fft_status_from_hipfftparams(ret); break; default: throw std::runtime_error("Invalid execution type"); } } return fft_status_success; } virtual fft_status execute(void** in, void** out) override { return execute(in[0], out[0]); }; fft_status execute(void* ibuffer, void* obuffer) { hipfftResult ret{HIPFFT_EXEC_FAILED}; // we have two ways to execute in hipFFT - hipfftExecFOO and // hipfftXtExec // Transforms that aren't supported by the hipfftType enum // require using the Xt method, but otherwise we hash the // token to decide how to execute this FFT. we want test // cases to rotate between different execution APIs, but we also // need the choice of API to be stable across reruns of the // same test cases. if(!hipfft_transform_type || std::hash()(token()) % 2) { ret = hipfftXtExec(plan, ibuffer, obuffer, direction); } else { try { switch(*hipfft_transform_type) { case HIPFFT_R2C: ret = hipfftExecR2C( plan, (hipfftReal*)ibuffer, (hipfftComplex*)(placement == fft_placement_inplace ? ibuffer : obuffer)); break; case HIPFFT_D2Z: ret = hipfftExecD2Z(plan, (hipfftDoubleReal*)ibuffer, (hipfftDoubleComplex*)(placement == fft_placement_inplace ? ibuffer : obuffer)); break; case HIPFFT_C2R: ret = hipfftExecC2R( plan, (hipfftComplex*)ibuffer, (hipfftReal*)(placement == fft_placement_inplace ? ibuffer : obuffer)); break; case HIPFFT_Z2D: ret = hipfftExecZ2D(plan, (hipfftDoubleComplex*)ibuffer, (hipfftDoubleReal*)(placement == fft_placement_inplace ? ibuffer : obuffer)); break; case HIPFFT_C2C: ret = hipfftExecC2C( plan, (hipfftComplex*)ibuffer, (hipfftComplex*)(placement == fft_placement_inplace ? ibuffer : obuffer), direction); break; case HIPFFT_Z2Z: ret = hipfftExecZ2Z(plan, (hipfftDoubleComplex*)ibuffer, (hipfftDoubleComplex*)(placement == fft_placement_inplace ? ibuffer : obuffer), direction); break; default: throw std::runtime_error("Invalid execution type"); } } catch(const std::exception& e) { std::cerr << e.what() << std::endl; } catch(...) { std::cerr << "unknown exception in execute(void* ibuffer, void* obuffer)" << std::endl; } } return fft_status_from_hipfftparams(ret); } bool is_contiguous() const { // compute contiguous stride, dist and check that the actual // strides/dists match std::vector contiguous_istride = compute_stride(ilength(), {}, placement == fft_placement_inplace && transform_type == fft_transform_type_real_forward); std::vector contiguous_ostride = compute_stride(olength(), {}, placement == fft_placement_inplace && transform_type == fft_transform_type_real_inverse); if(istride != contiguous_istride || ostride != contiguous_ostride) return false; return compute_idist() == idist && compute_odist() == odist; } private: // hipFFT provides multiple ways to create FFT plans: // - hipfftPlan1d/2d/3d (combined allocate + init for specific dim) // - hipfftPlanMany (combined allocate + init with dim as param) // - hipfftCreate + hipfftMakePlan1d/2d/3d (separate alloc + init for specific dim) // - hipfftCreate + hipfftMakePlanMany (separate alloc + init with dim as param) // - hipfftCreate + hipfftMakePlanMany64 (separate alloc + init with dim as param, 64-bit) // - hipfftCreate + hipfftXtMakePlanMany (separate alloc + init with separate i/o/exec types) // // Rotate through the choices for better test coverage. enum PlanCreateAPI { PLAN_Nd, PLAN_MANY, CREATE_MAKE_PLAN_Nd, CREATE_MAKE_PLAN_MANY, CREATE_MAKE_PLAN_MANY64, CREATE_XT_MAKE_PLAN_MANY, }; // Not all plan options work with all creation types. Return a // suitable plan creation type for the current FFT parameters. int get_create_type() { bool contiguous = is_contiguous(); bool batched = nbatch > 1; std::vector allowed_apis; // half-precision requires XtMakePlanMany if(precision == fft_precision_half) { allowed_apis.push_back(CREATE_XT_MAKE_PLAN_MANY); } else { // separate alloc + init "Many" APIs are always allowed allowed_apis.push_back(CREATE_MAKE_PLAN_MANY); allowed_apis.push_back(CREATE_MAKE_PLAN_MANY64); allowed_apis.push_back(CREATE_XT_MAKE_PLAN_MANY); // combined PlanMany API can't do scaling if(scale_factor == 1.0) allowed_apis.push_back(PLAN_MANY); // non-many APIs are only allowed if FFT is contiguous, and // only the 1D API allows for batched FFTs. if(contiguous && (!batched || dim() == 1)) { // combined Nd API can't do scaling if(scale_factor == 1.0) allowed_apis.push_back(PLAN_Nd); allowed_apis.push_back(CREATE_MAKE_PLAN_Nd); } } // hash the token to decide how to create this FFT. we want // test cases to rotate between different create APIs, but we // also need the choice of API to be stable across reruns of // the same test cases. return allowed_apis[std::hash()(token()) % allowed_apis.size()]; } // call hipfftPlan* functions hipfftResult_t create_plan_Nd() { auto ret = HIPFFT_INVALID_PLAN; switch(dim()) { case 1: ret = hipfftPlan1d(&plan, int_length[0], *hipfft_transform_type, nbatch); break; case 2: ret = hipfftPlan2d(&plan, int_length[0], int_length[1], *hipfft_transform_type); break; case 3: ret = hipfftPlan3d( &plan, int_length[0], int_length[1], int_length[2], *hipfft_transform_type); break; default: throw std::runtime_error("invalid dim"); } return ret; } hipfftResult_t create_plan_many() { auto ret = hipfftPlanMany(&plan, dim(), int_length.data(), int_inembed.data(), istride.back(), idist, int_onembed.data(), ostride.back(), odist, *hipfft_transform_type, nbatch); return ret; } // call hipfftCreate + hipfftMake* functions hipfftResult_t create_with_scale_factor() { auto ret = hipfftCreate(&plan); if(ret != HIPFFT_SUCCESS) return ret; if(scale_factor != 1.0) { ret = hipfftExtPlanScaleFactor(plan, scale_factor); if(ret != HIPFFT_SUCCESS) return ret; } return ret; } hipfftResult_t create_make_plan_Nd() { auto ret = create_with_scale_factor(); if(ret != HIPFFT_SUCCESS) return ret; switch(dim()) { case 1: return hipfftMakePlan1d( plan, int_length[0], *hipfft_transform_type, nbatch, &workbuffersize); case 2: return hipfftMakePlan2d( plan, int_length[0], int_length[1], *hipfft_transform_type, &workbuffersize); case 3: return hipfftMakePlan3d(plan, int_length[0], int_length[1], int_length[2], *hipfft_transform_type, &workbuffersize); default: throw std::runtime_error("invalid dim"); } } hipfftResult_t create_make_plan_many() { auto ret = create_with_scale_factor(); if(ret != HIPFFT_SUCCESS) return ret; return hipfftMakePlanMany(plan, dim(), int_length.data(), int_inembed.data(), istride.back(), idist, int_onembed.data(), ostride.back(), odist, *hipfft_transform_type, nbatch, &workbuffersize); } hipfftResult_t create_make_plan_many64() { auto ret = create_with_scale_factor(); if(ret != HIPFFT_SUCCESS) return ret; return hipfftMakePlanMany64(plan, dim(), ll_length.data(), ll_inembed.data(), istride.back(), idist, ll_onembed.data(), ostride.back(), odist, *hipfft_transform_type, nbatch, &workbuffersize); } hipfftResult_t create_xt_make_plan_many() { auto ret = create_with_scale_factor(); if(ret != HIPFFT_SUCCESS) return ret; // execution type is always complex, matching the precision // of the transform // Initializing as double by default hipDataType executionType = HIP_C_64F; switch(precision) { case fft_precision_half: executionType = HIP_C_16F; break; case fft_precision_single: executionType = HIP_C_32F; break; case fft_precision_double: executionType = HIP_C_64F; break; } return hipfftXtMakePlanMany(plan, dim(), ll_length.data(), ll_inembed.data(), istride.back(), idist, inputType, ll_onembed.data(), ostride.back(), odist, outputType, nbatch, &workbuffersize, executionType); } }; #endif hipFFT-rocm-5.7.1/clients/rider/000077500000000000000000000000001445203054200163475ustar00rootroot00000000000000hipFFT-rocm-5.7.1/clients/rider/CMakeLists.txt000066400000000000000000000072201445203054200211100ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2020 - 2023 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################# CMAKE_MINIMUM_REQUIRED(VERSION 3.16) find_package( Boost COMPONENTS program_options REQUIRED) set( Boost_USE_STATIC_LIBS OFF ) set( hipfft_rider_source rider.cpp ../rocFFT/shared/array_validator.cpp ) set( hipfft_rider_includes rider.h ../rocFFT/shared/array_validator.h ) add_executable( hipfft-rider ${hipfft_rider_source} ${hipfft_rider_includes} ) target_compile_options( hipfft-rider PRIVATE ${WARNING_FLAGS} ) set_target_properties( hipfft-rider PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON ) target_include_directories( hipfft-rider PRIVATE $ $ $ $ $ ) if( NOT CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) if( NOT BUILD_WITH_LIB STREQUAL "CUDA" ) if( WIN32 ) find_package( HIP CONFIG REQUIRED ) else() find_package( HIP MODULE REQUIRED ) endif() target_link_libraries( hipfft-rider PRIVATE hip::host hip::device ) else() target_compile_definitions( hipfft-rider PRIVATE __HIP_PLATFORM_NVIDIA__) target_include_directories( hipfft-rider PRIVATE ${HIP_INCLUDE_DIRS}) endif() else() if( BUILD_WITH_LIB STREQUAL "CUDA" AND DEFINED boost_program_options_VERSION ) # NVCC doesn't like linking with files that don't end in .so, so # we add a hack to remove the version number as the suffix. string(REGEX REPLACE \.${boost_program_options_VERSION} "" Boost_PROGRAM_OPTIONS_LIBRARY_RELEASE ${Boost_PROGRAM_OPTIONS_LIBRARY_RELEASE}) endif() endif() if ( BUILD_WITH_LIB STREQUAL "CUDA" ) target_compile_options( hipfft-rider PRIVATE -arch sm_53 -gencode=arch=compute_53,code=sm_53 -Xptxas=-w) target_link_libraries( hipfft-rider PRIVATE ${CUDA_LIBRARIES} ) else() if( NOT hiprand_FOUND ) find_package( hiprand REQUIRED ) endif() target_link_libraries( hipfft-rider PRIVATE hip::hiprand ) endif() target_link_libraries( hipfft-rider PRIVATE hip::hipfft ${Boost_PROGRAM_OPTIONS_LIBRARY_RELEASE} ) set_target_properties( hipfft-rider PROPERTIES DEBUG_POSTFIX "-d" CXX_EXTENSIONS NO ) set_target_properties( hipfft-rider PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) rocm_install(TARGETS hipfft-rider COMPONENT benchmarks) hipFFT-rocm-5.7.1/clients/rider/rider.cpp000066400000000000000000000316511445203054200201660ustar00rootroot00000000000000// Copyright (C) 2016 - 2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include #include #include #include "rider.h" #include namespace po = boost::program_options; #include "../rocFFT/shared/gpubuf.h" int main(int argc, char* argv[]) { // This helps with mixing output of both wide and narrow characters to the screen std::ios::sync_with_stdio(false); // Control output verbosity: int verbose{}; // hip Device number for running tests: int deviceId{}; // Number of performance trial samples int ntrial{}; // FFT parameters: hipfft_params params; // Token string to fully specify fft params. std::string token; // Declare the supported options. // clang-format doesn't handle boost program options very well: // clang-format off po::options_description opdesc("hipfft rider command line options"); opdesc.add_options()("help,h", "produces this help message") ("version,v", "Print queryable version information from the hipfft library") ("device", po::value(&deviceId)->default_value(0), "Select a specific device id") ("verbose", po::value(&verbose)->default_value(0), "Control output verbosity") ("ntrial,N", po::value(&ntrial)->default_value(1), "Trial size for the problem") ("notInPlace,o", "Not in-place FFT transform (default: in-place)") ("double", "Double precision transform (deprecated: use --precision double)") ("precision", po::value(¶ms.precision), "Transform precision: single (default), double, half") ("transformType,t", po::value(¶ms.transform_type) ->default_value(fft_transform_type_complex_forward), "Type of transform:\n0) complex forward\n1) complex inverse\n2) real " "forward\n3) real inverse") ( "batchSize,b", po::value(¶ms.nbatch)->default_value(1), "If this value is greater than one, arrays will be used ") ( "itype", po::value(¶ms.itype) ->default_value(fft_array_type_unset), "Array type of input data:\n0) interleaved\n1) planar\n2) real\n3) " "hermitian interleaved\n4) hermitian planar") ( "otype", po::value(¶ms.otype) ->default_value(fft_array_type_unset), "Array type of output data:\n0) interleaved\n1) planar\n2) real\n3) " "hermitian interleaved\n4) hermitian planar") ("length", po::value>(¶ms.length)->multitoken(), "Lengths.") ("istride", po::value>(¶ms.istride)->multitoken(), "Input strides.") ("ostride", po::value>(¶ms.ostride)->multitoken(), "Output strides.") ("idist", po::value(¶ms.idist)->default_value(0), "Logical distance between input batches.") ("odist", po::value(¶ms.odist)->default_value(0), "Logical distance between output batches.") ("isize", po::value>(¶ms.isize)->multitoken(), "Logical size of input buffer.") ("osize", po::value>(¶ms.osize)->multitoken(), "Logical size of output buffer.") ("ioffset", po::value>(¶ms.ioffset)->multitoken(), "Input offsets.") ("ooffset", po::value>(¶ms.ooffset)->multitoken(), "Output offsets.") ("scalefactor", po::value(¶ms.scale_factor), "Scale factor to apply to output.") ("token", po::value(&token)); // clang-format on po::variables_map vm; po::store(po::parse_command_line(argc, argv, opdesc), vm); po::notify(vm); if(vm.count("help")) { std::cout << opdesc << std::endl; return 0; } // if(vm.count("version")) // { // char v[256]; // rocfft_get_version_string(v, 256); // std::cout << "version " << v << std::endl; // return 0; // } if(!vm.count("length")) { std::cout << "Please specify transform length!" << std::endl; std::cout << opdesc << std::endl; return 0; } if(vm.count("ntrial")) { std::cout << "Running profile with " << ntrial << " samples\n"; } if(token != "") { std::cout << "Reading fft params from token:\n" << token << std::endl; try { params.from_token(token); } catch(...) { std::cout << "Unable to parse token." << std::endl; return 1; } } else { if(!vm.count("length")) { std::cout << "Please specify transform length!" << std::endl; std::cout << opdesc << std::endl; return 0; } params.placement = vm.count("notInPlace") ? fft_placement_notinplace : fft_placement_inplace; if(vm.count("double")) params.precision = fft_precision_double; if(vm.count("notInPlace")) { std::cout << "out-of-place\n"; } else { std::cout << "in-place\n"; } if(vm.count("length")) { std::cout << "length:"; for(auto& i : params.length) std::cout << " " << i; std::cout << "\n"; } if(vm.count("istride")) { std::cout << "istride:"; for(auto& i : params.istride) std::cout << " " << i; std::cout << "\n"; } if(vm.count("ostride")) { std::cout << "ostride:"; for(auto& i : params.ostride) std::cout << " " << i; std::cout << "\n"; } if(params.idist > 0) { std::cout << "idist: " << params.idist << "\n"; } if(params.odist > 0) { std::cout << "odist: " << params.odist << "\n"; } if(vm.count("ioffset")) { std::cout << "ioffset:"; for(auto& i : params.ioffset) std::cout << " " << i; std::cout << "\n"; } if(vm.count("ooffset")) { std::cout << "ooffset:"; for(auto& i : params.ooffset) std::cout << " " << i; std::cout << "\n"; } } std::cout << std::flush; // Fixme: set the device id properly after the IDs are synced // bewteen hip runtime and rocm-smi. // HIP_V_THROW(hipSetDevice(deviceId), "set device failed!"); params.validate(); if(!params.valid(verbose)) { throw std::runtime_error("Invalid parameters, add --verbose=1 for detail"); } std::cout << "Token: " << params.token() << std::endl; if(verbose) { std::cout << params.str() << std::endl; std::cout << "Token: " << params.token() << std::endl; } // Check free and total available memory: size_t free = 0; size_t total = 0; HIP_V_THROW(hipMemGetInfo(&free, &total), "hipMemGetInfo failed"); const auto raw_vram_footprint = params.fft_params_vram_footprint() + twiddle_table_vram_footprint(params); if(!vram_fits_problem(raw_vram_footprint, free)) { std::cout << "SKIPPED: Problem size (" << raw_vram_footprint << ") raw data too large for device.\n"; return EXIT_SUCCESS; } const auto vram_footprint = params.vram_footprint(); if(!vram_fits_problem(vram_footprint, free)) { std::cout << "SKIPPED: Problem size (" << vram_footprint << ") raw data too large for device.\n"; return EXIT_SUCCESS; } // Create plans: auto ret = params.create_plan(); if(ret != fft_status_success) throw std::runtime_error("Plan creation failed"); hipError_t hip_rt; // GPU input buffer: auto ibuffer_sizes = params.ibuffer_sizes(); std::vector ibuffer(ibuffer_sizes.size()); std::vector pibuffer(ibuffer_sizes.size()); for(unsigned int i = 0; i < ibuffer.size(); ++i) { hip_rt = ibuffer[i].alloc(ibuffer_sizes[i]); if(hip_rt != hipSuccess) throw std::runtime_error("Creating input Buffer failed"); pibuffer[i] = ibuffer[i].data(); } // Input data: params.compute_input(ibuffer); if(verbose > 1) { // Copy input to CPU auto cpu_input = allocate_host_buffer(params.precision, params.itype, params.isize); for(unsigned int idx = 0; idx < ibuffer.size(); ++idx) { hip_rt = hipMemcpy(cpu_input.at(idx).data(), ibuffer[idx].data(), ibuffer_sizes[idx], hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); } std::cout << "GPU input:\n"; params.print_ibuffer(cpu_input); } // GPU output buffer: std::vector obuffer_data; std::vector* obuffer = &obuffer_data; if(params.placement == fft_placement_inplace) { obuffer = &ibuffer; } else { auto obuffer_sizes = params.obuffer_sizes(); obuffer_data.resize(obuffer_sizes.size()); for(unsigned int i = 0; i < obuffer_data.size(); ++i) { hip_rt = obuffer_data[i].alloc(obuffer_sizes[i]); if(hip_rt != hipSuccess) throw std::runtime_error("Creating output Buffer failed"); } } std::vector pobuffer(obuffer->size()); for(unsigned int i = 0; i < obuffer->size(); ++i) { pobuffer[i] = obuffer->at(i).data(); } auto res = params.execute(pibuffer.data(), pobuffer.data()); if(res != fft_status_success) throw std::runtime_error("Execution failed"); // Run the transform several times and record the execution time: std::vector gpu_time(ntrial); hipEvent_t start, stop; hip_rt = hipEventCreate(&start); if(hip_rt != hipSuccess) throw std::runtime_error("hipEventCreate failed"); hip_rt = hipEventCreate(&stop); if(hip_rt != hipSuccess) throw std::runtime_error("hipEventCreate failed"); for(size_t itrial = 0; itrial < gpu_time.size(); ++itrial) { params.compute_input(ibuffer); hip_rt = hipEventRecord(start); if(hip_rt != hipSuccess) throw std::runtime_error("hipEventRecord failed"); res = params.execute(pibuffer.data(), pobuffer.data()); hip_rt = hipEventRecord(stop); if(hip_rt != hipSuccess) throw std::runtime_error("hipEventRecord failed"); hip_rt = hipEventSynchronize(stop); if(hip_rt != hipSuccess) throw std::runtime_error("hipEventSynchronize failed"); if(res != fft_status_success) throw std::runtime_error("Execution failed"); float time; hip_rt = hipEventElapsedTime(&time, start, stop); if(hip_rt != hipSuccess) throw std::runtime_error("hipEventElapsedTime failed"); gpu_time[itrial] = time; if(verbose > 2) { auto output = allocate_host_buffer(params.precision, params.otype, params.osize); for(unsigned int idx = 0; idx < output.size(); ++idx) { hip_rt = hipMemcpy( output[idx].data(), pobuffer[idx], output[idx].size(), hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); } std::cout << "GPU output:\n"; params.print_obuffer(output); } } std::cout << "\nExecution gpu time:"; for(const auto& i : gpu_time) { std::cout << " " << i; } std::cout << " ms" << std::endl; } hipFFT-rocm-5.7.1/clients/rider/rider.h000066400000000000000000000054671445203054200176410ustar00rootroot00000000000000// Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #ifndef RIDER_H #define RIDER_H #include "../hipfft_params.h" #include "hipfft.h" #include // This is used to either wrap a HIP function call, or to explicitly check a variable // for an error condition. If an error occurs, we throw. // Note: std::runtime_error does not take unicode strings as input, so only strings // supported inline hipError_t hip_V_Throw(hipError_t res, const std::string& msg, size_t lineno, const std::string& fileName) { if(res != hipSuccess) { std::stringstream tmp; tmp << "HIP_V_THROWERROR< "; tmp << res; tmp << " > ("; tmp << fileName; tmp << " Line: "; tmp << lineno; tmp << "): "; tmp << msg; std::string errorm(tmp.str()); std::cout << errorm << std::endl; throw std::runtime_error(errorm); } return res; } inline hipfftResult lib_V_Throw(hipfftResult res, const std::string& msg, size_t lineno, const std::string& fileName) { if(res != HIPFFT_SUCCESS) { std::stringstream tmp; tmp << "LIB_V_THROWERROR< "; tmp << res; tmp << " > ("; tmp << fileName; tmp << " Line: "; tmp << lineno; tmp << "): "; tmp << msg; std::string errorm(tmp.str()); std::cout << errorm << std::endl; throw std::runtime_error(errorm); } return res; } #define HIP_V_THROW(_status, _message) hip_V_Throw(_status, _message, __LINE__, __FILE__) #define LIB_V_THROW(_status, _message) lib_V_Throw(_status, _message, __LINE__, __FILE__) #endif // RIDER_H hipFFT-rocm-5.7.1/clients/rocFFT/000077500000000000000000000000001445203054200163655ustar00rootroot00000000000000hipFFT-rocm-5.7.1/clients/samples/000077500000000000000000000000001445203054200167065ustar00rootroot00000000000000hipFFT-rocm-5.7.1/clients/samples/CMakeLists.txt000066400000000000000000000100251445203054200214440ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2016 - 2023 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################# CMAKE_MINIMUM_REQUIRED( VERSION 3.16 ) # We use C++17 features, this will add compile option: -std=c++17 set( CMAKE_CXX_STANDARD 17 ) if( NOT TARGET hipfft ) find_package( hipfft REQUIRED CONFIG PATHS ) endif( ) set( sample_list hipfft_1d_z2z hipfft_1d_d2z hipfft_2d_z2z hipfft_2d_d2z hipfft_3d_z2z hipfft_3d_d2z hipfft_planmany_2d_z2z hipfft_planmany_2d_r2c hipfft_setworkarea ) # callback sample has its own HIP code, so it needs to be built with hipcc or clang++ if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) # on cuFFT backend, use of callbacks requires linking against the # static cuFFT library if( NOT (BUILD_WITH_LIB STREQUAL "CUDA") OR NOT BUILD_SHARED_LIBS ) list( APPEND sample_list hipfft_callback ) else() message( STATUS "hipfft_callback sample disabled on non-static CUDA build" ) endif() else() message( STATUS "hipfft_callback sample disabled, requires hipcc or Clang++ build" ) endif() foreach( sample ${sample_list} ) add_executable( ${sample} ${sample}.cpp ) set_target_properties( ${sample} PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON ) target_link_libraries( ${sample} PRIVATE hip::hipfft ) target_compile_options( ${sample} PRIVATE ${WARNING_FLAGS} ) if( NOT CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) if( WIN32 ) find_package( HIP CONFIG REQUIRED ) else() find_package( HIP MODULE REQUIRED ) endif() if( NOT BUILD_WITH_LIB STREQUAL "CUDA" ) target_link_libraries( ${sample} PRIVATE hip::host hip::device ) else() target_compile_definitions( ${sample} PRIVATE __HIP_PLATFORM_NVIDIA__) target_include_directories( ${sample} PRIVATE ${HIP_INCLUDE_DIRS}) endif() endif() if ( BUILD_WITH_LIB STREQUAL "CUDA" ) target_compile_options( ${sample} PRIVATE -arch sm_53 -gencode=arch=compute_53,code=sm_53 -Xptxas=-w) target_link_libraries( ${sample} PRIVATE ${CUDA_LIBRARIES} ) else() if( NOT hiprand_FOUND ) find_package( hiprand REQUIRED ) endif() target_link_libraries( ${sample} PRIVATE hip::hiprand ) endif() target_include_directories( ${sample} PRIVATE $ $ $ ${HIP_ROOT_DIR} ) set_target_properties( ${sample} PROPERTIES DEBUG_POSTFIX "-d" CXX_EXTENSIONS NO ) set_target_properties( ${sample} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) endforeach() # cuFFT callback code must be compiled with -dc to enable relocatable # device code if( BUILD_WITH_LIB STREQUAL "CUDA" AND hipfft_callback IN_LIST sample_list ) target_compile_options( hipfft_callback PRIVATE -dc ) endif() hipFFT-rocm-5.7.1/clients/samples/hipfft_1d_d2z.cpp000066400000000000000000000074041445203054200220420ustar00rootroot00000000000000// Copyright (C) 2019 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP #include "../hipfft_params.h" int main() { std::cout << "hipfft 1D double-precision real-to-complex transform\n"; const size_t Nx = 8; const size_t Ncomplex = Nx / 2 + 1; std::vector rdata(Nx); size_t real_bytes = sizeof(decltype(rdata)::value_type) * rdata.size(); std::vector> cdata(Ncomplex); size_t complex_bytes = sizeof(std::complex) * cdata.size(); // Create HIP device object double* x; hipError_t hip_rt; hip_rt = hipMalloc(&x, complex_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); // Inititalize the data for(size_t i = 0; i < Nx; i++) { rdata[i] = i; } std::cout << "input:\n"; for(size_t i = 0; i < rdata.size(); i++) { std::cout << rdata[i] << " "; } std::cout << std::endl; hip_rt = hipMemcpy(x, rdata.data(), real_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); // Create the plan hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; hipfftResult hipfft_rt = hipfftCreate(&plan); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to create plan"); hipfft_rt = hipfftPlan1d(&plan, // plan handle Nx, // transform length HIPFFT_D2Z, // transform type (HIPFFT_R2C for single-precision) 1); // number of transforms (deprecated) if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan1d failed"); // Execute plan: // hipfftExecD2Z: double precision, hipfftExecR2C: for single-precision // Direction is implied by real-to-complex direction hipfft_rt = hipfftExecD2Z(plan, x, (hipfftDoubleComplex*)x); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecD2Z failed"); std::cout << "output:\n"; hip_rt = hipMemcpy(cdata.data(), x, complex_bytes, hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); for(size_t i = 0; i < cdata.size(); i++) { std::cout << cdata[i] << " "; } std::cout << std::endl; hipfftDestroy(plan); hip_rt = hipFree(x); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/samples/hipfft_1d_z2z.cpp000066400000000000000000000072161445203054200220710ustar00rootroot00000000000000// Copyright (C) 2019 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP #include "../hipfft_params.h" int main() { std::cout << "hipfft 1D double-precision complex-to-complex transform\n"; const int Nx = 8; int direction = HIPFFT_FORWARD; // forward=-1, backward=1 std::vector> cdata(Nx); size_t complex_bytes = sizeof(decltype(cdata)::value_type) * cdata.size(); // Create HIP device object and copy data to device // Use hipfftComplex for single-precision hipError_t hip_rt; hipfftDoubleComplex* x; hip_rt = hipMalloc(&x, complex_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); // Inititalize the data for(size_t i = 0; i < Nx; i++) { cdata[i] = i; } std::cout << "input:\n"; for(size_t i = 0; i < cdata.size(); i++) { std::cout << cdata[i] << " "; } std::cout << std::endl; hip_rt = hipMemcpy(x, cdata.data(), complex_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); // Create the plan hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; hipfftResult hipfft_rt = hipfftCreate(&plan); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to create plan"); hipfft_rt = hipfftPlan1d(&plan, // plan handle Nx, // transform length HIPFFT_Z2Z, // transform type (HIPFFT_C2C for single-precision) 1); // number of transforms if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan1d failed"); // Execute plan: // hipfftExecZ2Z: double precision, hipfftExecC2C: for single-precision hipfft_rt = hipfftExecZ2Z(plan, x, x, direction); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2Z failed"); std::cout << "output:\n"; hip_rt = hipMemcpy(cdata.data(), x, complex_bytes, hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); for(size_t i = 0; i < cdata.size(); i++) { std::cout << cdata[i] << " "; } std::cout << std::endl; hipfftDestroy(plan); hip_rt = hipFree(x); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/samples/hipfft_2d_d2z.cpp000066400000000000000000000100171445203054200220350ustar00rootroot00000000000000// Copyright (C) 2019 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP #include "../hipfft_params.h" int main() { std::cout << "hipfft 2D double-precision real-to-complex transform\n"; const size_t Nx = 4; const size_t Ny = 5; std::cout << "Nx: " << Nx << "\tNy: " << Ny << std::endl; const size_t Nycomplex = Ny / 2 + 1; const size_t rstride = Nycomplex * 2; // Ny for out-of-place std::cout << "Input:\n"; std::vector rdata(Nx * rstride); for(size_t i = 0; i < Nx * rstride; i++) { rdata[i] = i; } for(size_t i = 0; i < Nx; i++) { for(size_t j = 0; j < Ny; j++) { auto pos = i * rstride + j; std::cout << rdata[pos] << " "; } std::cout << "\n"; } std::cout << std::endl; double* x; hipError_t hip_rt; hip_rt = hipMalloc(&x, rdata.size() * sizeof(decltype(rdata)::value_type)); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); hip_rt = hipMemcpy( x, rdata.data(), rdata.size() * sizeof(decltype(rdata)::value_type), hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); // Create plan: hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; hipfftResult hipfft_rt = hipfftCreate(&plan); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to create plan"); hipfft_rt = hipfftPlan2d(&plan, // plan handle Nx, // transform length Ny, // transform length HIPFFT_D2Z); // transform type (HIPFFT_R2C for single-precision) if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlandd failed"); // Execute plan: // hipfftExecD2Z: double precision. hipfftExecR2C: single-precision hipfft_rt = hipfftExecD2Z(plan, x, (hipfftDoubleComplex*)x); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecD2Z failed"); // Copy the output data to the host: std::vector> cdata(Nx * Nycomplex); hip_rt = hipMemcpy( cdata.data(), x, cdata.size() * sizeof(decltype(cdata)::value_type), hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); std::cout << "Output:\n"; for(size_t i = 0; i < Nx; i++) { for(size_t j = 0; j < Nycomplex; j++) { auto pos = i * Nycomplex + j; std::cout << cdata[pos] << " "; } std::cout << "\n"; } std::cout << std::endl; hipfftDestroy(plan); hip_rt = hipFree(x); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/samples/hipfft_2d_z2z.cpp000066400000000000000000000076151445203054200220750ustar00rootroot00000000000000// Copyright (C) 2019 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP #include "../hipfft_params.h" int main() { std::cout << "hipfft 2D double-precision complex-to-complex transform\n"; const int Nx = 4; const int Ny = 4; int direction = HIPFFT_FORWARD; // forward=-1, backward=1 std::vector> cdata(Nx * Ny); size_t complex_bytes = sizeof(decltype(cdata)::value_type) * cdata.size(); // Create HIP device object and copy data to device: // hipfftComplex for single-precision hipError_t hip_rt; hipfftDoubleComplex* x; hip_rt = hipMalloc(&x, complex_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); // Inititalize the data for(size_t i = 0; i < Nx * Ny; i++) { cdata[i] = i; } std::cout << "input:\n"; for(int i = 0; i < Nx; i++) { for(int j = 0; j < Ny; j++) { int pos = i * Ny + j; std::cout << cdata[pos] << " "; } std::cout << "\n"; } std::cout << std::endl; hip_rt = hipMemcpy(x, cdata.data(), complex_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); // Create plan hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; hipfftResult hipfft_rt = hipfftCreate(&plan); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to create plan"); hipfft_rt = hipfftPlan2d(&plan, // plan handle Nx, // transform length Ny, // transform length HIPFFT_Z2Z); // transform type (HIPFFT_C2C for single-precision) if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlandd failed"); // Execute plan // hipfftExecZ2Z: double precision, hipfftExecC2C: for single-precision hipfft_rt = hipfftExecZ2Z(plan, x, x, direction); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2Z failed"); std::cout << "output:\n"; hip_rt = hipMemcpy(cdata.data(), x, complex_bytes, hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); for(size_t i = 0; i < Nx; i++) { for(size_t j = 0; j < Ny; j++) { auto pos = i * Ny + j; std::cout << cdata[pos] << " "; } std::cout << "\n"; } std::cout << std::endl; hipfftDestroy(plan); hip_rt = hipFree(x); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/samples/hipfft_3d_d2z.cpp000066400000000000000000000104771445203054200220500ustar00rootroot00000000000000// Copyright (C) 2019 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP #include "../hipfft_params.h" int main() { std::cout << "hipfft 3D double-precision real-to-complex transform\n"; const size_t Nx = 4; const size_t Ny = 5; const size_t Nz = 6; std::cout << "Nx: " << Nx << "\tNy " << Ny << "\tNz " << Nz << std::endl; const size_t Nzcomplex = Nz / 2 + 1; const size_t rstride = Nzcomplex * 2; // Nz for out-of-place const size_t real_bytes = sizeof(double) * Nx * Ny * rstride; const size_t complex_bytes = 2 * sizeof(double) * Nx * Ny * Nzcomplex; double* x; hipError_t hip_rt; hip_rt = hipMalloc(&x, real_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); // Inititalize the data std::vector rdata(Nx * Ny * rstride); for(size_t i = 0; i < Nx * Ny * rstride; i++) { rdata[i] = i; } std::cout << "input:\n"; for(size_t i = 0; i < Nx; i++) { for(size_t j = 0; j < Ny; j++) { for(size_t k = 0; k < rstride; k++) { auto pos = (i * Ny + j) * rstride + k; std::cout << rdata[pos] << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << std::endl; hip_rt = hipMemcpy(x, rdata.data(), real_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); // Create plan: hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; hipfftResult hipfft_rt = hipfftCreate(&plan); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to create plan"); hipfft_rt = hipfftPlan3d(&plan, // plan handle Nx, Ny, Nz, // transform lengths HIPFFT_D2Z); // transform type (HIPFFT_R2C for single-precision) if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan3d failed"); // Execute plan: // hipfftExecD2Z: double precision, hipfftExecR2C: single-precision hipfft_rt = hipfftExecD2Z(plan, x, (hipfftDoubleComplex*)x); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecD2Z failed"); std::cout << "output:\n"; std::vector> cdata(Nx * Ny * Nz); hip_rt = hipMemcpy(cdata.data(), x, complex_bytes, hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); for(size_t i = 0; i < Nx; i++) { for(size_t j = 0; j < Ny; j++) { for(size_t k = 0; k < Nzcomplex; k++) { auto pos = (i * Ny + j) * Nzcomplex + k; std::cout << cdata[pos] << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << std::endl; hipfftDestroy(plan); hip_rt = hipFree(x); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/samples/hipfft_3d_z2z.cpp000066400000000000000000000102621445203054200220660ustar00rootroot00000000000000// Copyright (C) 2019 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP #include "../hipfft_params.h" int main() { std::cout << "hipfft 3D double-precision complex-to-complex transform\n"; const int Nx = 4; const int Ny = 4; const int Nz = 4; int direction = HIPFFT_FORWARD; // forward=-1, backward=1 std::vector> cdata(Nx * Ny * Nz); size_t complex_bytes = sizeof(decltype(cdata)::value_type) * cdata.size(); // Create HIP device object and copy data to device: // hipfftComplex for single-precision hipError_t hip_rt; hipfftDoubleComplex* x; hip_rt = hipMalloc(&x, complex_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); std::cout << "Input:\n"; for(size_t i = 0; i < Nx * Ny * Nz; i++) { cdata[i] = i; } for(int i = 0; i < Nx; i++) { for(int j = 0; j < Ny; j++) { for(int k = 0; k < Nz; k++) { int pos = (i * Ny + j) * Nz + k; std::cout << cdata[pos] << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << std::endl; hip_rt = hipMemcpy(x, cdata.data(), complex_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); // Create plan hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; hipfftResult hipfft_rt = hipfftCreate(&plan); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to create plan"); hipfft_rt = hipfftPlan3d(&plan, // plan handle Nx, // transform length Ny, // transform length Nz, // transform length HIPFFT_Z2Z); // transform type (HIPFFT_C2C for single-precision) if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan3d failed"); // Execute plan // hipfftExecZ2Z: double precision, hipfftExecC2C: for single-precision hipfft_rt = hipfftExecZ2Z(plan, x, x, direction); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2Z failed"); std::cout << "output:\n"; hip_rt = hipMemcpy(cdata.data(), x, complex_bytes, hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); for(int i = 0; i < Nx; i++) { for(int j = 0; j < Ny; j++) { for(int k = 0; k < Nz; k++) { int pos = (i * Ny + j) * Nz + k; std::cout << cdata[pos] << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << std::endl; hipfftDestroy(plan); hip_rt = hipFree(x); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/samples/hipfft_callback.cpp000066400000000000000000000135031445203054200225100ustar00rootroot00000000000000// Copyright (C) 2021 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include #include struct load_cbdata { hipfftDoubleComplex* filter; double scale; }; __device__ hipfftDoubleComplex load_callback(hipfftDoubleComplex* input, size_t offset, void* cbdata, void* sharedMem) { auto data = static_cast(cbdata); // NB: for optimal performance, one may need a custom // multiplication operator. return hipCmul(hipCmul(input[offset], data->filter[offset]), make_hipDoubleComplex(data->scale, 0)); } __device__ auto load_callback_dev = load_callback; int main() { std::cout << "hipfft 1D double-precision complex-to-complex transform with callback\n"; const int Nx = 8; int direction = HIPFFT_FORWARD; // forward=-1, backward=1 std::vector cdata(Nx), filter(Nx); size_t complex_bytes = sizeof(decltype(cdata)::value_type) * cdata.size(); // Create HIP device object and copy data to device // Use hipfftComplex for single-precision hipError_t hip_rt; hipfftDoubleComplex *x, *filter_dev; hip_rt = hipMalloc(&x, complex_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); hip_rt = hipMalloc(&filter_dev, complex_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); // Initialize the data and filter for(size_t i = 0; i < Nx; i++) { cdata[i].x = i; cdata[i].y = i; filter[i].x = rand() / static_cast(RAND_MAX); filter[i].y = 0; } hip_rt = hipMemcpy(x, cdata.data(), complex_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); hip_rt = hipMemcpy(filter_dev, filter.data(), complex_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); std::cout << "input:\n"; for(size_t i = 0; i < cdata.size(); i++) { std::cout << "(" << cdata[i].x << ", " << cdata[i].y << ") "; } std::cout << std::endl; // Create the plan hipfftHandle plan = NULL; hipfftResult hipfft_rt = hipfftCreate(&plan); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to create plan"); hipfft_rt = hipfftPlan1d(&plan, // plan handle Nx, // transform length HIPFFT_Z2Z, // transform type (HIPFFT_C2C for single-precision) 1); // number of transforms if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan1d failed"); // prepare callback load_cbdata cbdata_host; cbdata_host.filter = filter_dev; cbdata_host.scale = 1.0 / static_cast(Nx); void* cbdata_dev; hip_rt = hipMalloc(&cbdata_dev, sizeof(load_cbdata)); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); hip_rt = hipMemcpy(cbdata_dev, &cbdata_host, sizeof(load_cbdata), hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); void* cbptr_host = nullptr; hip_rt = hipMemcpyFromSymbol(&cbptr_host, HIP_SYMBOL(load_callback_dev), sizeof(void*)); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpyFromSymbol failed"); // set callback hipfft_rt = hipfftXtSetCallback(plan, &cbptr_host, HIPFFT_CB_LD_COMPLEX_DOUBLE, &cbdata_dev); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftXtSetCallback failed"); // Execute plan: // hipfftExecZ2Z: double precision, hipfftExecC2C: for single-precision hipfft_rt = hipfftExecZ2Z(plan, x, x, direction); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2Z failed"); std::cout << "output:\n"; hip_rt = hipMemcpy(cdata.data(), x, complex_bytes, hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); for(size_t i = 0; i < cdata.size(); i++) { std::cout << "(" << cdata[i].x << ", " << cdata[i].y << ") "; } std::cout << std::endl; hipfftDestroy(plan); hip_rt = hipFree(cbdata_dev); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); hip_rt = hipFree(filter_dev); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); hip_rt = hipFree(x); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/samples/hipfft_planmany_2d_r2c.cpp000066400000000000000000000125641445203054200237340ustar00rootroot00000000000000// Copyright (C) 2019 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP int main() { std::cout << "hipfft 2D single-precision real-to-complex transform using " "advanced interface\n"; int rank = 2; int n[2] = {4, 5}; int howmany = 3; // batch size int n1_complex_elements = n[1] / 2 + 1; int n1_padding_real_elements = n1_complex_elements * 2; int istride = 1; int ostride = istride; int inembed[2] = {istride * n[0], istride * n1_padding_real_elements}; int onembed[2] = {ostride * n[0], ostride * n1_complex_elements}; int idist = inembed[0] * inembed[1]; int odist = onembed[0] * onembed[1]; std::cout << "n: " << n[0] << " " << n[1] << "\n" << "howmany: " << howmany << "\n" << "istride: " << istride << "\tostride: " << ostride << "\n" << "inembed: " << inembed[0] << " " << inembed[1] << "\n" << "onembed: " << onembed[0] << " " << onembed[1] << "\n" << "idist: " << idist << "\todist: " << odist << "\n" << std::endl; std::vector data(howmany * idist); const auto total_bytes = data.size() * sizeof(decltype(data)::value_type); std::cout << "input:\n"; std::fill(data.begin(), data.end(), 0.0); for(int ibatch = 0; ibatch < howmany; ++ibatch) { for(int i = 0; i < n[0]; i++) { for(int j = 0; j < n[1]; j++) { const auto pos = ibatch * idist + istride * (i * inembed[1] + j); data[pos] = i + ibatch + j; } } } for(int ibatch = 0; ibatch < howmany; ++ibatch) { std::cout << "batch: " << ibatch << "\n"; for(int i = 0; i < inembed[0]; i++) { for(int j = 0; j < inembed[1]; j++) { const auto pos = ibatch * idist + i * inembed[1] + j; std::cout << data[pos] << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << std::endl; hipfftHandle hipForwardPlan; hipfftResult hipfft_rt; hipfft_rt = hipfftPlanMany(&hipForwardPlan, rank, n, inembed, istride, idist, onembed, ostride, odist, HIPFFT_R2C, // Use HIPFFT_D2Z for double-precsion. howmany); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to create plan"); hipfftReal* gpu_data; hipError_t hip_rt; hip_rt = hipMalloc((void**)&gpu_data, total_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); hip_rt = hipMemcpy(gpu_data, (void*)data.data(), total_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); hipfft_rt = hipfftExecR2C(hipForwardPlan, gpu_data, (hipfftComplex*)gpu_data); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to execute plan"); hip_rt = hipMemcpy((void*)data.data(), gpu_data, total_bytes, hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); std::cout << "output:\n"; const std::complex* output = (std::complex*)data.data(); for(int ibatch = 0; ibatch < howmany; ++ibatch) { std::cout << "batch: " << ibatch << "\n"; for(int i = 0; i < onembed[0]; i++) { for(int j = 0; j < onembed[1]; j++) { const auto pos = ibatch * odist + i * onembed[1] + j; std::cout << output[pos] << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << std::endl; hipfftDestroy(hipForwardPlan); hip_rt = hipFree(gpu_data); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/samples/hipfft_planmany_2d_z2z.cpp000066400000000000000000000117271445203054200237730ustar00rootroot00000000000000// Copyright (C) 2019 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP int main() { std::cout << "hipfft 2D double-precision complex-to-complex transform using " "advanced interface\n"; int rank = 2; int n[2] = {4, 5}; int howmany = 3; // array is contiguous in memory int istride = 1; // in-place transforms require istride=ostride int ostride = istride; // we choose to have no padding around our data: int inembed[2] = {istride * n[0], istride * n[1]}; // in-place transforms require inembed=oneembed: int onembed[2] = {inembed[0], inembed[1]}; int idist = inembed[0] * inembed[1]; int odist = onembed[0] * onembed[1]; std::cout << "n: " << n[0] << " " << n[1] << "\n" << "howmany: " << howmany << "\n" << "istride: " << istride << "\tostride: " << ostride << "\n" << "inembed: " << inembed[0] << " " << inembed[1] << "\n" << "onembed: " << onembed[0] << " " << onembed[1] << "\n" << "idist: " << idist << "\todist: " << odist << "\n" << std::endl; std::vector> data(howmany * idist); const auto total_bytes = data.size() * sizeof(decltype(data)::value_type); std::cout << "input:\n"; std::fill(data.begin(), data.end(), 0.0); for(int ibatch = 0; ibatch < howmany; ++ibatch) { for(int i = 0; i < n[0]; i++) { for(int j = 0; j < n[1]; j++) { const auto pos = ibatch * idist + istride * (i * inembed[1] + j); data[pos] = std::complex(i + ibatch, j); } } } for(int ibatch = 0; ibatch < howmany; ++ibatch) { std::cout << "batch: " << ibatch << "\n"; for(int i = 0; i < inembed[0]; i++) { for(int j = 0; j < inembed[1]; j++) { const auto pos = ibatch * idist + i * inembed[1] + j; std::cout << data[pos] << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << std::endl; hipfftHandle hipPlan; hipfftResult hipfft_rt; hipfft_rt = hipfftPlanMany( &hipPlan, rank, n, inembed, istride, idist, onembed, ostride, odist, HIPFFT_Z2Z, howmany); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to create plan"); hipError_t hip_rt; hipfftDoubleComplex* d_in_out; hip_rt = hipMalloc((void**)&d_in_out, total_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); hip_rt = hipMemcpy(d_in_out, (void*)data.data(), total_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); hipfft_rt = hipfftExecZ2Z(hipPlan, d_in_out, d_in_out, HIPFFT_FORWARD); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("failed to execute plan"); hip_rt = hipMemcpy((void*)data.data(), d_in_out, total_bytes, hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); std::cout << "output:\n"; for(int ibatch = 0; ibatch < howmany; ++ibatch) { std::cout << "batch: " << ibatch << "\n"; for(int i = 0; i < onembed[0]; i++) { for(int j = 0; j < onembed[1]; j++) { const auto pos = ibatch * odist + i * onembed[1] + j; std::cout << data[pos] << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << std::endl; hip_rt = hipFree(d_in_out); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/samples/hipfft_setworkarea.cpp000066400000000000000000000113661445203054200233100ustar00rootroot00000000000000// Copyright (C) 2019 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP #include "../hipfft_params.h" int main() { std::cout << "hipfft 1D single-precision real-to-complex transform showing " "work memory usage\n"; int major_version; hipfftGetProperty(HIPFFT_MAJOR_VERSION, &major_version); std::cout << "hipFFT major_version " << major_version << std::endl; const size_t N = 9; const size_t Ncomplex = (N / 2 + 1); std::vector rdata(N); std::vector> cdata(Ncomplex); size_t real_bytes = sizeof(decltype(rdata)::value_type) * rdata.size(); size_t complex_bytes = sizeof(decltype(cdata)::value_type) * cdata.size(); hipError_t hip_rt = hipSuccess; hipfftResult hipfft_rt = HIPFFT_SUCCESS; std::cout << "input:\n"; for(size_t i = 0; i < N; i++) { rdata[i] = i; } for(size_t i = 0; i < N; i++) { std::cout << rdata[i] << " "; } std::cout << std::endl; // Create HIP device object. hipfftReal* x; hip_rt = hipMalloc(&x, real_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); hipfftComplex* y; hip_rt = hipMalloc(&y, complex_bytes); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); // Copy input data to device hip_rt = hipMemcpy(x, rdata.data(), real_bytes, hipMemcpyHostToDevice); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); size_t workSize; hipfft_rt = hipfftEstimate1d(N, HIPFFT_R2C, 1, &workSize); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftEstimate1d failed"); std::cout << "hipfftEstimate 1d workSize: " << workSize << std::endl; hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; hipfft_rt = hipfftCreate(&plan); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); hipfft_rt = hipfftSetAutoAllocation(plan, 0); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftSetAutoAllocation failed"); hipfft_rt = hipfftMakePlan1d(plan, N, HIPFFT_R2C, 1, &workSize); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftMakePlan1d failed"); // Set work buffer hipfftComplex* workBuf; hip_rt = hipMalloc(&workBuf, workSize); if(hip_rt != hipSuccess) throw std::runtime_error("hipMalloc failed"); hipfft_rt = hipfftSetWorkArea(plan, workBuf); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftSetWorkArea failed"); hipfft_rt = hipfftGetSize(plan, &workSize); if(hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftGetSize failed"); std::cout << "hipfftGetSize workSize: " << workSize << std::endl; // Execute plan hipfft_rt = hipfftExecR2C(plan, x, (hipfftComplex*)y); // Copy result back to host hip_rt = hipMemcpy(cdata.data(), y, complex_bytes, hipMemcpyDeviceToHost); if(hip_rt != hipSuccess) throw std::runtime_error("hipMemcpy failed"); std::cout << "output:\n"; for(size_t i = 0; i < Ncomplex; i++) { std::cout << cdata[i] << " "; } std::cout << std::endl; hipfftDestroy(plan); hip_rt = hipFree(x); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); hip_rt = hipFree(workBuf); if(hip_rt != hipSuccess) throw std::runtime_error("hipFree failed"); return 0; } hipFFT-rocm-5.7.1/clients/tests/000077500000000000000000000000001445203054200164045ustar00rootroot00000000000000hipFFT-rocm-5.7.1/clients/tests/CMakeLists.txt000066400000000000000000000144311445203054200211470ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2016 - 2023 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################# find_package( Boost COMPONENTS program_options REQUIRED) set( Boost_USE_STATIC_LIBS OFF ) find_package( FFTW 3.0 REQUIRED MODULE COMPONENTS FLOAT DOUBLE ) set( THREADS_PREFER_PTHREAD_FLAG ON ) find_package( Threads REQUIRED ) set( hipfft-test_source gtest_main.cpp hipfft_accuracy_test.cpp simple_test.cpp accuracy_test_1D.cpp accuracy_test_2D.cpp accuracy_test_3D.cpp accuracy_test_callback.cpp ../rocFFT/shared/array_validator.cpp ) set( hipfft-test_includes ../rocFFT/clients/tests/fftw_transform.h ../rocFFT/clients/tests/rocfft_against_fftw.h ../rocFFT/clients/tests/misc/include/test_exception.h ../rocFFT/shared/array_validator.h ) add_executable( hipfft-test ${hipfft-test_source} ${hipfft-test_includes} ) target_include_directories( hipfft-test PRIVATE $ $ $ $ $ $ ) if( GTEST_FOUND ) target_include_directories( hipfft-test PRIVATE $ ) target_link_libraries( hipfft-test PRIVATE ${GTEST_LIBRARIES} ) else() add_dependencies( hipfft-test gtest ) target_include_directories( hipfft-test PRIVATE hipfft-test_include_dirs ${GTEST_INCLUDE_DIRS} ) target_link_libraries( hipfft-test PRIVATE ${GTEST_LIBRARIES} ) endif() target_compile_options( hipfft-test PRIVATE ${WARNING_FLAGS} ) if( NOT CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) if( NOT BUILD_WITH_LIB STREQUAL "CUDA" ) if( WIN32 ) find_package( HIP CONFIG REQUIRED ) else() find_package( HIP MODULE REQUIRED ) endif() target_link_libraries( hipfft-test PRIVATE hip::host hip::device ) else() target_compile_definitions( hipfft-test PRIVATE __HIP_PLATFORM_NVIDIA__) target_include_directories( hipfft-test PRIVATE ${HIP_INCLUDE_DIRS}) endif() else() if( BUILD_WITH_LIB STREQUAL "CUDA" AND DEFINED boost_program_options_VERSION ) # NVCC doesn't like linking with files that don't end in .so, so # we add a hack to remove the version number as the suffix. string(REGEX REPLACE \.${boost_program_options_VERSION} "" Boost_PROGRAM_OPTIONS_LIBRARY_RELEASE ${Boost_PROGRAM_OPTIONS_LIBRARY_RELEASE}) endif() foreach( target ${AMDGPU_TARGETS} ) target_compile_options( hipfft-test PRIVATE --offload-arch=${target} ) endforeach( ) endif() if ( BUILD_WITH_LIB STREQUAL "CUDA" ) target_compile_options( hipfft-test PRIVATE -arch sm_53 -gencode=arch=compute_53,code=sm_53 -Xptxas=-w) target_link_libraries( hipfft-test PRIVATE ${CUDA_LIBRARIES} ) target_compile_definitions( hipfft-test PUBLIC _CUFFT_BACKEND ) else() if( NOT hiprand_FOUND ) find_package( hiprand REQUIRED ) endif() target_link_libraries( hipfft-test PRIVATE hip::hiprand ) endif() option( BUILD_CLIENTS_TESTS_OPENMP "Build tests with OpenMP" ON ) if( BUILD_CLIENTS_TESTS_OPENMP ) if( BUILD_WITH_LIB STREQUAL "CUDA" ) message( STATUS "OpenMP is not supported on CUDA, building tests without it" ) else() target_compile_options( hipfft-test PRIVATE -DBUILD_CLIENTS_TESTS_OPENMP ) if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) target_compile_options( hipfft-test PRIVATE -fopenmp ) target_link_libraries( hipfft-test PRIVATE -fopenmp -L${HIP_CLANG_ROOT}/lib -Wl,-rpath=${HIP_CLANG_ROOT}/lib ) target_include_directories( hipfft-test PRIVATE ${HIP_CLANG_ROOT}/include ) else() if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") target_compile_options( hipfft-test PRIVATE -fopenmp=libomp ) target_link_options( hipfft-test PRIVATE -fopenmp=libomp ) endif() endif() endif() endif() if(FFTW_MULTITHREAD) target_compile_options( hipfft-test PRIVATE -DFFTW_MULTITHREAD ) endif( ) set_target_properties( hipfft-test PROPERTIES DEBUG_POSTFIX "-d" CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON ) target_link_libraries( hipfft-test PRIVATE ${FFTW_LIBRARIES} ${Boost_PROGRAM_OPTIONS_LIBRARY_RELEASE} Threads::Threads ) target_link_libraries( hipfft-test PRIVATE hip::hipfft ) set_target_properties( hipfft-test PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) rocm_install(TARGETS hipfft-test COMPONENT tests) if (WIN32) # Ensure tests run with HIP DLLs and not anything the driver owns # in system32. Libraries like amdhip64.dll are also in the HIP # runtime, and we need run with those. But the only way to make a # same-named DLL override something in system32 is to have it next # to the executable. So copy them in. file( GLOB third_party_dlls LIST_DIRECTORIES OFF CONFIGURE_DEPENDS ${HIP_DIR}/bin/*.dll C:/Windows/System32/libomp140*.dll ) foreach( file_i ${third_party_dlls}) add_custom_command( TARGET hipfft-test POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy ${file_i} ${PROJECT_BINARY_DIR}/staging ) endforeach( file_i ) endif() hipFFT-rocm-5.7.1/clients/tests/accuracy_test_1D.cpp000066400000000000000000000657651445203054200223100ustar00rootroot00000000000000// Copyright (C) 2016 - 2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include #include "../rocFFT/shared/fft_params.h" #include "accuracy_test.h" #include "fftw_transform.h" #include "rocfft_against_fftw.h" using ::testing::ValuesIn; // TODO: handle special case where length=2 for real/complex transforms. const static std::vector pow2_range = {2, 4, 8, 16, 32, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, 67108864, 134217728, 268435456, 536870912}; // 2^30 is 1073741824; const static std::vector pow2_range_half = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536}; const static std::vector pow3_range = {3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441, 1594323, 4782969, 14348907, 43046721, 129140163, 387420489}; const static std::vector pow5_range = {5, 25, 125, 625, 3125, 15625, 78125, 390625, 1953125, 9765625, 48828125, 244140625}; // radix 7, 11, 13 sizes that are either pure powers or sizes people have wanted in the wild const static std::vector radX_range = {7, 49, 84, 112, 11, 13, 52, 104, 208, 343, 2401, 16807}; const static std::vector mix_range = {6, 10, 12, 15, 20, 30, 56, 120, 150, 225, 240, 300, 336, 486, 600, 900, 1250, 1500, 1875, 2160, 2187, 2250, 2500, 3000, 4000, 12000, 24000, 72000}; const static std::vector prime_range = {17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97}; static std::vector small_1D_sizes() { static const size_t SMALL_1D_MAX = 8192; // generate a list of sizes from 2 and up, skipping any sizes that are already covered std::vector covered_sizes; std::copy(pow2_range.begin(), pow2_range.end(), std::back_inserter(covered_sizes)); std::copy(pow3_range.begin(), pow3_range.end(), std::back_inserter(covered_sizes)); std::copy(pow5_range.begin(), pow5_range.end(), std::back_inserter(covered_sizes)); std::copy(radX_range.begin(), radX_range.end(), std::back_inserter(covered_sizes)); std::copy(mix_range.begin(), mix_range.end(), std::back_inserter(covered_sizes)); std::copy(prime_range.begin(), prime_range.end(), std::back_inserter(covered_sizes)); std::sort(covered_sizes.begin(), covered_sizes.end()); std::vector output; for(size_t i = 2; i < SMALL_1D_MAX; ++i) { if(!std::binary_search(covered_sizes.begin(), covered_sizes.end(), i)) { output.push_back(i); } } return output; } const static std::vector> stride_range = {{1}}; const static std::vector batch_range_1D = {4, 2, 1}; const static std::vector> stride_range_for_prime = {{1}, {2}, {3}, {64}, {65}}; //TODO: this will be merged back to stride_range const static std::vector> ioffset_range_zero = {{0, 0}}; const static std::vector> ooffset_range_zero = {{0, 0}}; const static std::vector> ioffset_range = {{0, 0}, {1, 1}}; const static std::vector> ooffset_range = {{0, 0}, {1, 1}}; INSTANTIATE_TEST_SUITE_P(pow2_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_pow2_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(pow2_1D_half, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range_half}), {fft_precision_half}, batch_range_1D, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_pow2_1D_half, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range_half}), {fft_precision_half}, batch_range_1D, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(pow3_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow3_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_pow3_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow3_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(pow5_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow5_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_pow5_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow5_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(radX_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({radX_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_radX_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({radX_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(prime_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({prime_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_prime_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({prime_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(mix_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({mix_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_mix_1D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({mix_range}), precision_range_sp_dp, batch_range_1D, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); // small 1D sizes just need to make sure our factorization isn't // completely broken, so we just check simple C2C outplace interleaved INSTANTIATE_TEST_SUITE_P(small_1D, accuracy_test, ::testing::ValuesIn(param_generator_base( {fft_transform_type_complex_forward}, generate_lengths({small_1D_sizes()}), {fft_precision_single}, {1}, [](fft_transform_type t, const std::vector& place_range, const bool planar) { return std::vector{ std::make_tuple(t, place_range[0], fft_array_type_complex_interleaved, fft_array_type_complex_interleaved)}; }, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, {fft_placement_notinplace})), accuracy_test::TestName); // NB: // We have known non-unit strides issues for 1D: // - C2C middle size(for instance, single precision, 8192) // - C2C large size(for instance, single precision, 524288) // We need to fix non-unit strides first, and then address non-unit strides + batch tests. // Then check these problems of R2C and C2R. After that, we could open arbitrary permutations in the // main tests. // // The below test covers non-unit strides, pow of 2, middle sizes, which has SBCC/SBRC kernels // invloved. const static std::vector pow2_range_for_stride = {4096, 8192, 524288}; const static std::vector pow2_range_for_stride_half = {4096, 8192}; const static std::vector> stride_range_for_pow2 = {{2}, {3}}; const static std::vector batch_range_for_stride = {2, 1}; INSTANTIATE_TEST_SUITE_P( pow2_1D_stride_complex, accuracy_test, ::testing::ValuesIn(param_generator_complex(generate_lengths({pow2_range_for_stride}), precision_range_sp_dp, batch_range_1D, stride_range_for_pow2, stride_range_for_pow2, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( pow2_1D_stride_real, accuracy_test, ::testing::ValuesIn(param_generator_real(generate_lengths({pow2_range_for_stride}), precision_range_sp_dp, batch_range_1D, stride_range_for_pow2, stride_range_for_pow2, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( pow2_1D_stride_real_half, accuracy_test, ::testing::ValuesIn(param_generator_real(generate_lengths({pow2_range_for_stride_half}), {fft_precision_half}, batch_range_1D, stride_range_for_pow2, stride_range_for_pow2, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); // Create an array parameters for strided 2D batched transforms. inline auto param_generator_complex_1d_batched_2d(const std::vector>& v_lengths, const std::vector& precision_range, const std::vector>& ioffset_range, const std::vector>& ooffset_range, const std::vector& place_range) { std::vector params; // for(auto& transform_type : // {fft_transform_type_complex_forward, fft_transform_type_complex_inverse}) // { for(auto& transform_type : trans_type_range_complex) { for(const auto& lengths : v_lengths) { // try to ensure that we are given literal lengths, not // something to be passed to generate_lengths if(lengths.empty() || lengths.size() > 3) { assert(false); continue; } for(const auto precision : precision_range) { for(const auto& types : generate_types(transform_type, place_range, false)) { for(const auto& ioffset : ioffset_range) { for(const auto& ooffset : ooffset_range) { fft_params param; param.length = lengths; param.istride = lengths; param.ostride = lengths; param.nbatch = lengths[0]; param.precision = precision; param.transform_type = std::get<0>(types); param.placement = std::get<1>(types); param.idist = 1; param.odist = 1; param.itype = std::get<2>(types); param.otype = std::get<3>(types); param.ioffset = ioffset; param.ooffset = ooffset; params.push_back(param); } } } } } } return params; } const static std::vector pow2_range_2D = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192}; INSTANTIATE_TEST_SUITE_P( pow2_1D_complex_batched_2D_strided, accuracy_test, ::testing::ValuesIn(param_generator_complex_1d_batched_2d(generate_lengths({pow2_range_2D}), precision_range_sp_dp, ioffset_range_zero, ooffset_range_zero, place_range)), accuracy_test::TestName); const static std::vector pow3_range_2D = {3, 27, 81, 243, 729, 2187, 6561}; INSTANTIATE_TEST_SUITE_P( pow3_1D_complex_batched_2D_strided, accuracy_test, ::testing::ValuesIn(param_generator_complex_1d_batched_2d(generate_lengths({pow3_range_2D}), precision_range_sp_dp, ioffset_range_zero, ooffset_range_zero, place_range)), accuracy_test::TestName); const static std::vector pow5_range_2D = {5, 25, 125, 625, 3125, 15625}; INSTANTIATE_TEST_SUITE_P( pow5_1D_complex_batched_2D_strided, accuracy_test, ::testing::ValuesIn(param_generator_complex_1d_batched_2d(generate_lengths({pow5_range_2D}), precision_range_sp_dp, ioffset_range_zero, ooffset_range_zero, place_range)), accuracy_test::TestName); const static std::vector prime_range_2D = {7, 11, 13, 17, 19, 23, 29, 263, 269, 271, 277}; INSTANTIATE_TEST_SUITE_P( prime_1D_complex_batched_2D_strided, accuracy_test, ::testing::ValuesIn(param_generator_complex_1d_batched_2d(generate_lengths({prime_range_2D}), precision_range_sp_dp, ioffset_range_zero, ooffset_range_zero, place_range)), accuracy_test::TestName); hipFFT-rocm-5.7.1/clients/tests/accuracy_test_2D.cpp000066400000000000000000000370511445203054200222740ustar00rootroot00000000000000// Copyright (C) 2016 - 2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include #include "../rocFFT/shared/fft_params.h" #include "accuracy_test.h" #include "fftw_transform.h" #include "rocfft_against_fftw.h" using ::testing::ValuesIn; // Set parameters // TODO: enable 16384, 32768 when omp support is available (takes too // long!) const static std::vector pow2_range = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192}; // For the current configuration, half-precision has a fft size limit of 65536 const static std::vector pow2_range_half = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024}; const static std::vector pow3_range = {3, 27, 81, 243, 729, 2187, 6561}; const static std::vector pow5_range = {5, 25, 125, 625, 3125, 15625}; const static std::vector prime_range = {7, 11, 13, 17, 19, 23, 29, 263, 269, 271, 277}; const static std::vector mix_range = {56, 120, 336, 2160, 5000, 6000, 8000}; const static std::vector> stride_range = {{1}}; static std::vector> ioffset_range_zero = {{0, 0}}; static std::vector> ooffset_range_zero = {{0, 0}}; static std::vector> ioffset_range = {{0, 0}, {1, 1}}; static std::vector> ooffset_range = {{0, 0}, {1, 1}}; INSTANTIATE_TEST_SUITE_P(pow2_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range, pow2_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_pow2_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range, pow2_range}), precision_range_full, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(pow2_2D_half, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range_half, {2, 4, 8, 16, 32}}), {fft_precision_half}, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_pow2_2D_half, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range_half, {2, 4, 8, 16, 32}}), {fft_precision_half}, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(pow3_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow3_range, pow3_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_pow3_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow3_range, pow3_range}), precision_range_full, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(pow5_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow5_range, pow5_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_pow5_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow5_range, pow5_range}), precision_range_full, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(prime_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({prime_range, prime_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_prime_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({prime_range, prime_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(mix_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({mix_range, mix_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_mix_2D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({mix_range, mix_range}), precision_range_full, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); // test length-1 on one dimension against a variety of non-1 lengths INSTANTIATE_TEST_SUITE_P(len1_2D, accuracy_test, ::testing::ValuesIn(param_generator( generate_lengths({{1}, {4, 8, 8192, 3, 27, 7, 11, 5000, 8000}}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); // length-1 on the other dimension INSTANTIATE_TEST_SUITE_P(len1_swap_2D, accuracy_test, ::testing::ValuesIn(param_generator( generate_lengths({{4, 8, 8192, 3, 27, 7, 11, 5000, 8000}, {1}}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); hipFFT-rocm-5.7.1/clients/tests/accuracy_test_3D.cpp000066400000000000000000000277701445203054200223040ustar00rootroot00000000000000// Copyright (C) 2016 - 2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include #include "../rocFFT/shared/fft_params.h" #include "accuracy_test.h" #include "fftw_transform.h" #include "rocfft_against_fftw.h" using ::testing::ValuesIn; // Set parameters // TODO: 512, 1024, 2048 make the tests take too long; re-enable when // test speed is improved. static std::vector pow2_range = {4, 8, 16, 32, 128, 256}; static std::vector pow2_range_half = {4, 8, 16, 32}; // SBCC+SBRC as a sub-node of a 3D TRTRTR std::vector> pow2_adhoc = {{4, 4, 8192}}; static std::vector pow3_range = {3, 9, 27, 81, 243}; static std::vector pow5_range = {5, 25, 125}; static std::vector prime_range = {7, 11, 13, 17, 19, 23, 29}; static std::vector> stride_range = {{1}}; static std::vector> ioffset_range_zero = {{0, 0}}; static std::vector> ooffset_range_zero = {{0, 0}}; static std::vector> ioffset_range = {{0, 0}, {1, 1}}; static std::vector> ooffset_range = {{0, 0}, {1, 1}}; INSTANTIATE_TEST_SUITE_P( pow2_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range, pow2_range, pow2_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( DISABLED_offset_pow2_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range, pow2_range, pow2_range}), precision_range_full, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(pow2_3D_half, accuracy_test, ::testing::ValuesIn(param_generator( generate_lengths({pow2_range_half, pow2_range_half, pow2_range_half}), {fft_precision_half}, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_offset_pow2_3D_half, accuracy_test, ::testing::ValuesIn(param_generator( generate_lengths({pow2_range_half, pow2_range_half, pow2_range_half}), {fft_precision_half}, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( pow3_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow3_range, pow3_range, pow3_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( DISABLED_offset_pow3_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow3_range, pow3_range, pow3_range}), precision_range_full, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( pow5_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow5_range, pow5_range, pow5_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( DISABLED_offset_pow5_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow5_range, pow5_range, pow5_range}), precision_range_full, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( prime_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({prime_range, prime_range, prime_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( DISABLED_offset_prime_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({prime_range, prime_range, prime_range}), precision_range_full, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( mix_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range, pow3_range, prime_range}), precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P( DISABLED_offset_mix_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({pow2_range, pow3_range, prime_range}), precision_range_full, batch_range, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, false)), accuracy_test::TestName); // Test combinations of SBRC sizes, plus a non-SBRC size (10) to // exercise fused SBRC+transpose kernels. static std::vector sbrc_range = {50, 64, 81, 100, 200, 10, 128, 256}; static std::vector sbrc_batch_range = {2, 1}; INSTANTIATE_TEST_SUITE_P( sbrc_3D, accuracy_test, ::testing::ValuesIn(param_generator(generate_lengths({sbrc_range, sbrc_range, sbrc_range}), precision_range_sp_dp, sbrc_batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); // pick small sizes that will exercise 2D_SINGLE and a couple of sizes that won't static std::vector inner_batch_3D_range = {4, 8, 16, 32, 20, 24, 64}; static std::vector inner_batch_3D_batch_range = {3, 2, 1}; INSTANTIATE_TEST_SUITE_P( inner_batch_3D, accuracy_test, // TODO: enable for real as well, but currently real kernels have // trouble with weird strides ::testing::ValuesIn(param_generator_complex( generate_lengths({inner_batch_3D_range, inner_batch_3D_range, inner_batch_3D_range}), precision_range_sp_dp, inner_batch_3D_batch_range, stride_generator_3D_inner_batch(stride_range), stride_generator_3D_inner_batch(stride_range), ioffset_range_zero, ooffset_range_zero, place_range, false, false)), accuracy_test::TestName); hipFFT-rocm-5.7.1/clients/tests/accuracy_test_callback.cpp000066400000000000000000000136061445203054200235630ustar00rootroot00000000000000// Copyright (C) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include "accuracy_test.h" std::vector> callback_sizes = { // some single kernel sizes {4}, {16}, {81}, {100}, // L1D_TRTRT sizes {70}, {77}, {1344}, // L1D_CC sizes {8192}, {10000}, // prime {23}, {29}, // 2D_SINGLE sizes, small and big {16, 8}, {32, 32}, {9, 81}, {27, 81}, {81, 27}, {256, 9}, {9, 256}, {125, 32}, {32, 125}, // 2D_RTRT {20, 40}, {81, 81}, // 2D_RC {128, 64}, {128, 256}, // more complicated children of 2D_RTRT (L1D_TRTRT, L1D_CC, prime) {4, 63}, {63, 4}, {4, 8192}, {8192, 4}, {4, 23}, {23, 4}, // 3D_TRTRTR, with complicated children {63, 5, 6}, {6, 5, 63}, {23, 5, 6}, {6, 5, 23}, {70, 5, 6}, {6, 5, 70}, {8192, 5, 6}, {6, 5, 8192}, // 3D_RTRT, with complicated children {23, 4, 4}, {4, 4, 23}, {70, 4, 4}, {4, 4, 70}, {8192, 4, 4}, {4, 4, 8192}, // 3D odd lengths {27, 27, 27}, // 3D_BLOCK_RC {64, 64, 64}, }; const static std::vector> stride_range = {{1}}; const static std::vector> ioffset_range_zero = {{0, 0}}; const static std::vector> ooffset_range_zero = {{0, 0}}; const static std::vector> ioffset_range = {{0, 0}, {1, 1}}; const static std::vector> ooffset_range = {{0, 0}, {1, 1}}; auto transform_types = {fft_transform_type_complex_forward, fft_transform_type_real_forward}; #ifdef __HIP__ INSTANTIATE_TEST_SUITE_P(callback, accuracy_test, ::testing::ValuesIn(param_generator_base(transform_types, callback_sizes, precision_range_sp_dp, batch_range, generate_types, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false, true)), accuracy_test::TestName); INSTANTIATE_TEST_SUITE_P(DISABLED_callback, accuracy_test, ::testing::ValuesIn(param_generator_base(transform_types, callback_sizes, precision_range_sp_dp, batch_range, generate_types, stride_range, stride_range, ioffset_range, ooffset_range, place_range, false, true)), accuracy_test::TestName); #endif // one of the obvious use cases for callbacks is to implement result // scaling manually, so use the same sizes to test rocFFT's own // result scaling feature. inline auto param_generator_scaling(const std::vector>& v_lengths) { auto params = param_generator(callback_sizes, precision_range_sp_dp, batch_range, stride_range, stride_range, ioffset_range_zero, ooffset_range_zero, place_range, false); for(auto& param : params) param.scale_factor = 7.23; return params; } // cuFFT does not support result scaling #ifndef _CUFFT_BACKEND INSTANTIATE_TEST_SUITE_P(scaling, accuracy_test, ::testing::ValuesIn(param_generator_scaling(callback_sizes)), accuracy_test::TestName); #endif hipFFT-rocm-5.7.1/clients/tests/gtest_main.cpp000066400000000000000000000430111445203054200212410ustar00rootroot00000000000000// Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. /// @file /// @brief googletest based unit tester for rocfft /// #include #include #include #include #include #include #include #include "../hipfft_params.h" #include "../rocFFT/shared/concurrency.h" #include "../rocFFT/shared/environment.h" #include "../rocFFT/shared/work_queue.h" #include "hipfft.h" #include "hipfft_accuracy_test.h" #include "hipfft_test_params.h" #ifdef WIN32 #include #else #include #endif #include namespace po = boost::program_options; // Control output verbosity: int verbose; // User-defined random seed size_t random_seed; // Probability of running individual planar FFTs double planar_prob; // Probability of running individual callback FFTs double callback_prob; // Transform parameters for manual test: fft_params manual_params; // Host memory limitation for tests (GiB): size_t ramgb; // Device memory limitation for tests (GiB): size_t vramgb; // Allow skipping tests if there is a runtime error bool skip_runtime_fails; // But count the number of failures int n_hip_failures = 0; // Manually specified precision cutoffs: double single_epsilon; double double_epsilon; double half_epsilon; // Measured precision cutoffs: double max_linf_eps_double = 0.0; double max_l2_eps_double = 0.0; double max_linf_eps_single = 0.0; double max_l2_eps_single = 0.0; double max_linf_eps_half = 0.0; double max_l2_eps_half = 0.0; // Control whether we use FFTW's wisdom (which we use to imply FFTW_MEASURE). bool use_fftw_wisdom = false; // Cache the last cpu fft that was requested last_cpu_fft_cache last_cpu_fft_data; system_memory get_system_memory() { system_memory memory_data; #ifdef WIN32 MEMORYSTATUSEX info; info.dwLength = sizeof(info); if(!GlobalMemoryStatusEx(&info)) return memory_data; memory_data.total_bytes = info.ullTotalPhys; memory_data.free_bytes = info.ullAvailPhys; #else struct sysinfo info; if(sysinfo(&info) != 0) return memory_data; memory_data.total_bytes = info.totalram * info.mem_unit; memory_data.free_bytes = info.freeram * info.mem_unit; #endif return memory_data; } system_memory start_memory = get_system_memory(); void precompile_test_kernels(const std::string& precompile_file) { std::cout << "precompiling test kernels...\n"; WorkQueue tokenQueue; std::vector tokens; auto ut = testing::UnitTest::GetInstance(); for(int ts_index = 0; ts_index < ut->total_test_suite_count(); ++ts_index) { const auto ts = ut->GetTestSuite(ts_index); // skip disabled suites if(strncmp(ts->name(), "DISABLED", 8) == 0) continue; for(int ti_index = 0; ti_index < ts->total_test_count(); ++ti_index) { const auto ti = ts->GetTestInfo(ti_index); std::string name = ti->name(); // only care about accuracy tests if(name.find("vs_fftw/") != std::string::npos) { name.erase(0, 8); // change batch to 1, so we don't waste time creating // multiple plans that differ only by batch auto idx = name.find("_batch_"); if(idx == std::string::npos) continue; // advance idx to batch number idx += 7; auto end = name.find('_', idx); if(end == std::string::npos) continue; name.replace(idx, end - idx, "1"); tokens.emplace_back(std::move(name)); } } } std::random_device dev; std::mt19937 dist(dev()); std::shuffle(tokens.begin(), tokens.end(), dist); auto precompile_begin = std::chrono::steady_clock::now(); std::cout << "precompiling " << tokens.size() << " FFT plans...\n"; for(auto&& t : tokens) tokenQueue.push(std::move(t)); EnvironmentSetTemp env_compile_only{"ROCFFT_INTERNAL_COMPILE_ONLY", "1"}; const size_t NUM_THREADS = rocfft_concurrency(); std::vector threads; for(size_t i = 0; i < NUM_THREADS; ++i) { threads.emplace_back([&tokenQueue]() { for(;;) { std::string token{tokenQueue.pop()}; if(token.empty()) break; try { hipfft_params params; params.from_token(token); params.validate(); params.create_plan(); } catch(std::exception& e) { // failed to create a plan, abort // // we could continue on, but the test should just // fail later anyway in the same way. so report // which token failed early and get out throw std::runtime_error(token + " plan creation failure: " + e.what()); } } }); // insert empty tokens to tell threads to stop tokenQueue.push({}); } for(auto& t : threads) t.join(); auto precompile_end = std::chrono::steady_clock::now(); std::chrono::duration precompile_ms = precompile_end - precompile_begin; std::cout << "done precompiling FFT plans in " << static_cast(precompile_ms.count()) << " ms\n"; } int main(int argc, char* argv[]) { // Parse arguments before initiating gtest. po::options_description opdesc( "\n" "hipFFT Runtime Test command line options\n" "NB: input parameters are row-major.\n" "\n" "FFTW accuracy test cases are named using these identifiers:\n" "\n" " len_: problem dimensions, row-major\n" " single,double: precision\n" " ip,op: in-place or out-of-place\n" " batch_: batch size\n" " istride__: input stride (ostride for output stride), format may be:\n" " CI - complex interleaved\n" " CP - complex planar\n" " R - real\n" " HI - hermitian interleaved\n" " HP - hermitian planar\n" "\n" "Usage"); // clang-format does not handle boost program options very well: // clang-format off opdesc.add_options() ("verbose,v", po::value()->default_value(0), "print out detailed information for the tests.") ("seed", po::value(&random_seed), "Random seed; if unset, use an actual random seed.") ("callback_prob", po::value(&callback_prob)->default_value(0.1), "Probability of running individual callback transforms"); // clang-format on po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(opdesc).allow_unregistered().run(), vm); po::notify(vm); verbose = vm["verbose"].as(); // NB: If we initialize gtest first, then it removes all of its own command-line // arguments and sets argc and argv correctly; no need to jump through hoops for // boost::program_options. ::testing::InitGoogleTest(&argc, argv); // Filename for fftw and fftwf wisdom. std::string fftw_wisdom_filename; // Token string to fully specify fft params for the manual test. std::string test_token; // Filename for precompiled kernels to be written to std::string precompile_file; // Declare the supported options. // clang-format does not handle boost program options very well: // clang-format off opdesc.add_options() ("help,h", "produces this help message") ("skip_runtime_fails", po::value(&skip_runtime_fails)->default_value(true), "Skip the test if there is a runtime failure.") ("transformType,t", po::value(&manual_params.transform_type) ->default_value(fft_transform_type_complex_forward), "Type of transform:\n0) complex forward\n1) complex inverse\n2) real " "forward\n3) real inverse") ("notInPlace,o", "Not in-place FFT transform (default: in-place)") ("callback", "Inject load/store callbacks") ("double", "Double precision transform (deprecated: use --precision double)") ("precision", po::value(&manual_params.precision), "Transform precision: single (default), double, half") ( "itype", po::value(&manual_params.itype) ->default_value(fft_array_type_unset), "Array type of input data:\n0) interleaved\n1) planar\n2) real\n3) " "hermitian interleaved\n4) hermitian planar") ( "otype", po::value(&manual_params.otype) ->default_value(fft_array_type_unset), "Array type of output data:\n0) interleaved\n1) planar\n2) real\n3) " "hermitian interleaved\n4) hermitian planar") ("length", po::value>(&manual_params.length)->multitoken(), "Lengths.") ( "batchSize,b", po::value(&manual_params.nbatch)->default_value(1), "If this value is greater than one, arrays will be used ") ("istride", po::value>(&manual_params.istride)->multitoken(), "Input stride.") ("ostride", po::value>(&manual_params.ostride)->multitoken(), "Output stride.") ("idist", po::value(&manual_params.idist)->default_value(0), "Logical distance between input batches.") ("odist", po::value(&manual_params.odist)->default_value(0), "Logical distance between output batches.") ("ioffset", po::value>(&manual_params.ioffset)->multitoken(), "Input offset.") ("ooffset", po::value>(&manual_params.ooffset)->multitoken(), "Output offset.") ("isize", po::value>(&manual_params.isize)->multitoken(), "Logical size of input buffer.") ("osize", po::value>(&manual_params.osize)->multitoken(), "Logical size of output.") ("R", po::value(&ramgb)->default_value((start_memory.total_bytes + ONE_GiB - 1) / ONE_GiB), "Ram limit in GiB for tests.") ("V", po::value(&vramgb)->default_value(0), "vram limit in GiB for tests.") ("half_epsilon", po::value(&half_epsilon)->default_value(9.77e-4)) ("single_epsilon", po::value(&single_epsilon)->default_value(3.75e-5)) ("double_epsilon", po::value(&double_epsilon)->default_value(1e-15)) ("wise,w", "use FFTW wisdom") ("wisdomfile,W", po::value(&fftw_wisdom_filename)->default_value("wisdom3.txt"), "FFTW3 wisdom filename") ("scalefactor", po::value(&manual_params.scale_factor), "Scale factor to apply to output.") ("token", po::value(&test_token)->default_value(""), "Test token name for manual test") ("precompile", po::value(&precompile_file), "Precompile kernels to a file for all test cases before running tests"); // clang-format on po::store(po::parse_command_line(argc, argv, opdesc), vm); po::notify(vm); if(vm.count("help")) { std::cout << opdesc << std::endl; return 0; } std::cout << "half epsilon: " << half_epsilon << "\tsingle epsilon: " << single_epsilon << "\tdouble epsilon: " << double_epsilon << std::endl; manual_params.placement = vm.count("notInPlace") ? fft_placement_notinplace : fft_placement_inplace; if(vm.count("double")) manual_params.precision = fft_precision_double; if(vm.count("wise")) { use_fftw_wisdom = true; } if(vm.count("callback")) { manual_params.run_callbacks = true; } if(manual_params.length.empty()) { manual_params.length.push_back(8); // TODO: add random size? } if(manual_params.istride.empty()) { manual_params.istride.push_back(1); // TODO: add random size? } if(manual_params.ostride.empty()) { manual_params.ostride.push_back(1); // TODO: add random size? } // if precompiling, tell rocFFT to use the specified cache file // to write kernels to // // but if our environment already has a cache file for RTC, then // we should just use that std::unique_ptr env_precompile; if(!precompile_file.empty() && rocfft_getenv("ROCFFT_RTC_CACHE_PATH").empty()) { env_precompile = std::make_unique("ROCFFT_RTC_CACHE_PATH", precompile_file.c_str()); } // rocfft_setup(); // char v[256]; // rocfft_get_version_string(v, 256); // std::cout << "rocFFT version: " << v << std::endl; #ifdef FFTW_MULTITHREAD fftw_init_threads(); fftwf_init_threads(); fftw_plan_with_nthreads(rocfft_concurrency()); fftwf_plan_with_nthreads(rocfft_concurrency()); #endif if(use_fftw_wisdom) { if(verbose) { std::cout << "Using " << fftw_wisdom_filename << " wisdom file\n"; } std::ifstream fftw_wisdom_file(fftw_wisdom_filename); std::string allwisdom = std::string(std::istreambuf_iterator(fftw_wisdom_file), std::istreambuf_iterator()); std::string fftw_wisdom; std::string fftwf_wisdom; bool load_wisdom = false; bool load_fwisdom = false; std::istringstream input; input.str(allwisdom); // Separate the single-precision and double-precision wisdom: for(std::string line; std::getline(input, line);) { if(line.rfind("(fftw", 0) == 0 && line.find("fftw_wisdom") != std::string::npos) { load_wisdom = true; } if(line.rfind("(fftw", 0) == 0 && line.find("fftwf_wisdom") != std::string::npos) { load_fwisdom = true; } if(load_wisdom) { fftw_wisdom.append(line + "\n"); } if(load_fwisdom) { fftwf_wisdom.append(line + "\n"); } if(line.rfind(")", 0) == 0) { load_wisdom = false; load_fwisdom = false; } } fftw_import_wisdom_from_string(fftw_wisdom.c_str()); fftwf_import_wisdom_from_string(fftwf_wisdom.c_str()); } if(test_token != "") { std::cout << "Reading fft params from token:\n" << test_token << std::endl; try { manual_params.from_token(test_token); } catch(...) { std::cout << "Unable to parse token." << std::endl; return 1; } } if(vm.count("precompile")) precompile_test_kernels(precompile_file); auto retval = RUN_ALL_TESTS(); if(use_fftw_wisdom) { std::string fftw_wisdom = std::string(fftw_export_wisdom_to_string()); std::string fftwf_wisdom = std::string(fftwf_export_wisdom_to_string()); fftw_wisdom.append(std::string(fftwf_export_wisdom_to_string())); std::ofstream fftw_wisdom_file(fftw_wisdom_filename); fftw_wisdom_file << fftw_wisdom; fftw_wisdom_file << fftwf_wisdom; fftw_wisdom_file.close(); } std::cout << "half precision max l-inf epsilon: " << max_linf_eps_half << std::endl; std::cout << "half precision max l2 epsilon: " << max_l2_eps_half << std::endl; std::cout << "single precision max l-inf epsilon: " << max_linf_eps_single << std::endl; std::cout << "single precision max l2 epsilon: " << max_l2_eps_single << std::endl; std::cout << "double precision max l-inf epsilon: " << max_linf_eps_double << std::endl; std::cout << "double precision max l2 epsilon: " << max_l2_eps_double << std::endl; // rocfft_cleanup(); return retval; } TEST(manual, vs_fftw) { // Run an individual test using the provided command-line parameters. std::cout << "Manual test:" << std::endl; manual_params.validate(); std::cout << "Token: " << manual_params.token() << std::endl; hipfft_params params(manual_params); fft_vs_reference(params, false); } hipFFT-rocm-5.7.1/clients/tests/hipfft_accuracy_test.cpp000066400000000000000000000527711445203054200233150ustar00rootroot00000000000000// Copyright (C) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include #include #include #include #include "hipfft.h" #include "../hipfft_params.h" #include "../rocFFT/clients/tests/fftw_transform.h" #include "../rocFFT/clients/tests/rocfft_accuracy_test.h" #include "../rocFFT/clients/tests/rocfft_against_fftw.h" #include "../rocFFT/shared/gpubuf.h" #include "../rocFFT/shared/rocfft_complex.h" void fft_vs_reference(hipfft_params& params, bool round_trip) { switch(params.precision) { case fft_precision_half: fft_vs_reference_impl<_Float16, hipfft_params>(params, round_trip); break; case fft_precision_single: fft_vs_reference_impl(params, round_trip); break; case fft_precision_double: fft_vs_reference_impl(params, round_trip); break; } } // Test for comparison between FFTW and hipFFT. TEST_P(accuracy_test, vs_fftw) { hipfft_params params(GetParam()); params.validate(); if(!params.valid(verbose)) { if(verbose) { std::cout << "Invalid parameters, skip this test." << std::endl; } GTEST_SKIP(); } if(!params.run_callbacks) fft_vs_reference(params, true); SUCCEED(); } #ifdef __HIP__ // load/store callbacks - cbdata in each is actually a scalar double // with a number to apply to each element template __host__ __device__ Tdata load_callback(Tdata* input, size_t offset, void* cbdata, void* sharedMem) { auto testdata = static_cast(cbdata); // multiply each element by scalar if(input == testdata->base) return input[offset] * testdata->scalar; // wrong base address passed, return something obviously wrong else { // wrong base address passed, return something obviously wrong return input[0]; } } __device__ auto load_callback_dev_half = load_callback<_Float16>; __device__ auto load_callback_dev_complex_half = load_callback>; __device__ auto load_callback_dev_float = load_callback; __device__ auto load_callback_dev_complex_float = load_callback>; __device__ auto load_callback_dev_double = load_callback; __device__ auto load_callback_dev_complex_double = load_callback>; void* get_load_callback_host(fft_array_type itype, fft_precision precision, bool round_trip_inverse = false) { void* load_callback_host = nullptr; switch(itype) { case fft_array_type_complex_interleaved: case fft_array_type_hermitian_interleaved: { switch(precision) { case fft_precision_half: EXPECT_EQ(hipMemcpyFromSymbol(&load_callback_host, HIP_SYMBOL(load_callback_dev_complex_half), sizeof(void*)), hipSuccess); return load_callback_host; case fft_precision_single: EXPECT_EQ(hipMemcpyFromSymbol(&load_callback_host, HIP_SYMBOL(load_callback_dev_complex_float), sizeof(void*)), hipSuccess); return load_callback_host; case fft_precision_double: EXPECT_EQ(hipMemcpyFromSymbol(&load_callback_host, HIP_SYMBOL(load_callback_dev_complex_double), sizeof(void*)), hipSuccess); return load_callback_host; } } case fft_array_type_real: { switch(precision) { case fft_precision_half: EXPECT_EQ(hipMemcpyFromSymbol( &load_callback_host, HIP_SYMBOL(load_callback_dev_half), sizeof(void*)), hipSuccess); return load_callback_host; case fft_precision_single: EXPECT_EQ(hipMemcpyFromSymbol( &load_callback_host, HIP_SYMBOL(load_callback_dev_float), sizeof(void*)), hipSuccess); return load_callback_host; case fft_precision_double: EXPECT_EQ(hipMemcpyFromSymbol( &load_callback_host, HIP_SYMBOL(load_callback_dev_double), sizeof(void*)), hipSuccess); return load_callback_host; } } default: // planar is unsupported for now return load_callback_host; } } template __host__ __device__ static void store_callback(Tdata* output, size_t offset, Tdata element, void* cbdata, void* sharedMem) { auto testdata = static_cast(cbdata); // add scalar to each element if(output == testdata->base) { output[offset] = element + testdata->scalar; } // otherwise, wrong base address passed, just don't write } __device__ auto store_callback_dev_half = store_callback<_Float16>; __device__ auto store_callback_dev_complex_half = store_callback>; __device__ auto store_callback_dev_float = store_callback; __device__ auto store_callback_dev_complex_float = store_callback>; __device__ auto store_callback_dev_double = store_callback; __device__ auto store_callback_dev_complex_double = store_callback>; void* get_store_callback_host(fft_array_type otype, fft_precision precision, bool round_trip_inverse = false) { void* store_callback_host = nullptr; switch(otype) { case fft_array_type_complex_interleaved: case fft_array_type_hermitian_interleaved: { switch(precision) { case fft_precision_half: EXPECT_EQ(hipMemcpyFromSymbol(&store_callback_host, HIP_SYMBOL(store_callback_dev_complex_half), sizeof(void*)), hipSuccess); return store_callback_host; case fft_precision_single: EXPECT_EQ(hipMemcpyFromSymbol(&store_callback_host, HIP_SYMBOL(store_callback_dev_complex_float), sizeof(void*)), hipSuccess); return store_callback_host; case fft_precision_double: EXPECT_EQ(hipMemcpyFromSymbol(&store_callback_host, HIP_SYMBOL(store_callback_dev_complex_double), sizeof(void*)), hipSuccess); return store_callback_host; } } case fft_array_type_real: { switch(precision) { case fft_precision_half: EXPECT_EQ(hipMemcpyFromSymbol( &store_callback_host, HIP_SYMBOL(store_callback_dev_half), sizeof(void*)), hipSuccess); return store_callback_host; case fft_precision_single: EXPECT_EQ(hipMemcpyFromSymbol(&store_callback_host, HIP_SYMBOL(store_callback_dev_float), sizeof(void*)), hipSuccess); return store_callback_host; case fft_precision_double: EXPECT_EQ(hipMemcpyFromSymbol(&store_callback_host, HIP_SYMBOL(store_callback_dev_double), sizeof(void*)), hipSuccess); return store_callback_host; } } default: // planar is unsupported for now return store_callback_host; } } // implement result scaling as a store callback, as rocFFT tests do void apply_store_callback(const fft_params& params, std::vector& output) { if(!params.run_callbacks && params.scale_factor == 1.0) return; callback_test_data cbdata; cbdata.scalar = params.store_cb_scalar; cbdata.base = output.front().data(); switch(params.otype) { case fft_array_type_complex_interleaved: case fft_array_type_hermitian_interleaved: { switch(params.precision) { case fft_precision_half: { const size_t elem_size = sizeof(std::complex<_Float16>); const size_t num_elems = output.front().size() / elem_size; auto output_begin = reinterpret_cast*>(output.front().data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; if(params.scale_factor != 1.0) element = element * params.scale_factor; if(params.run_callbacks) store_callback(output_begin, i, element, &cbdata, nullptr); } break; } case fft_precision_single: { const size_t elem_size = sizeof(std::complex); const size_t num_elems = output.front().size() / elem_size; auto output_begin = reinterpret_cast*>(output.front().data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; if(params.scale_factor != 1.0) element = element * params.scale_factor; if(params.run_callbacks) store_callback(output_begin, i, element, &cbdata, nullptr); } break; } case fft_precision_double: { const size_t elem_size = sizeof(std::complex); const size_t num_elems = output.front().size() / elem_size; auto output_begin = reinterpret_cast*>(output.front().data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; if(params.scale_factor != 1.0) element = element * params.scale_factor; if(params.run_callbacks) store_callback(output_begin, i, element, &cbdata, nullptr); } break; } } } break; case fft_array_type_complex_planar: case fft_array_type_hermitian_planar: { // planar wouldn't run callbacks, but we could still want scaling switch(params.precision) { case fft_precision_half: { const size_t elem_size = sizeof(std::complex<_Float16>); for(auto& buf : output) { const size_t num_elems = buf.size() / elem_size; auto output_begin = reinterpret_cast*>(buf.data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; if(params.scale_factor != 1.0) element = element * params.scale_factor; } } break; } case fft_precision_single: { const size_t elem_size = sizeof(std::complex); for(auto& buf : output) { const size_t num_elems = buf.size() / elem_size; auto output_begin = reinterpret_cast*>(buf.data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; if(params.scale_factor != 1.0) element = element * params.scale_factor; } } break; } case fft_precision_double: { const size_t elem_size = sizeof(std::complex); for(auto& buf : output) { const size_t num_elems = buf.size() / elem_size; auto output_begin = reinterpret_cast*>(buf.data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; if(params.scale_factor != 1.0) element = element * params.scale_factor; } } break; } } } break; case fft_array_type_real: { switch(params.precision) { case fft_precision_half: { const size_t elem_size = sizeof(_Float16); const size_t num_elems = output.front().size() / elem_size; auto output_begin = reinterpret_cast<_Float16*>(output.front().data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; if(params.scale_factor != 1.0) element = element * params.scale_factor; if(params.run_callbacks) store_callback(output_begin, i, element, &cbdata, nullptr); } break; } case fft_precision_single: { const size_t elem_size = sizeof(float); const size_t num_elems = output.front().size() / elem_size; auto output_begin = reinterpret_cast(output.front().data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; if(params.scale_factor != 1.0) element = element * params.scale_factor; if(params.run_callbacks) store_callback(output_begin, i, element, &cbdata, nullptr); } break; } case fft_precision_double: { const size_t elem_size = sizeof(double); const size_t num_elems = output.front().size() / elem_size; auto output_begin = reinterpret_cast(output.front().data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; if(params.scale_factor != 1.0) element = element * params.scale_factor; if(params.run_callbacks) store_callback(output_begin, i, element, &cbdata, nullptr); } break; } } } break; default: // this is FFTW data which should always be interleaved (if complex) abort(); } } // apply load callback if necessary void apply_load_callback(const fft_params& params, std::vector& input) { if(!params.run_callbacks) return; // we're applying callbacks to FFTW input/output which we can // assume is contiguous and non-planar callback_test_data cbdata; cbdata.scalar = params.load_cb_scalar; cbdata.base = input.front().data(); switch(params.itype) { case fft_array_type_complex_interleaved: case fft_array_type_hermitian_interleaved: { switch(params.precision) { case fft_precision_half: { const size_t elem_size = sizeof(std::complex<_Float16>); const size_t num_elems = input.front().size() / elem_size; auto input_begin = reinterpret_cast*>(input.front().data()); for(size_t i = 0; i < num_elems; ++i) { input_begin[i] = load_callback(input_begin, i, &cbdata, nullptr); } break; } case fft_precision_single: { const size_t elem_size = sizeof(std::complex); const size_t num_elems = input.front().size() / elem_size; auto input_begin = reinterpret_cast*>(input.front().data()); for(size_t i = 0; i < num_elems; ++i) { input_begin[i] = load_callback(input_begin, i, &cbdata, nullptr); } break; } case fft_precision_double: { const size_t elem_size = sizeof(std::complex); const size_t num_elems = input.front().size() / elem_size; auto input_begin = reinterpret_cast*>(input.front().data()); for(size_t i = 0; i < num_elems; ++i) { input_begin[i] = load_callback(input_begin, i, &cbdata, nullptr); } break; } } } break; case fft_array_type_real: { switch(params.precision) { case fft_precision_half: { const size_t elem_size = sizeof(_Float16); const size_t num_elems = input.front().size() / elem_size; auto input_begin = reinterpret_cast<_Float16*>(input.front().data()); for(size_t i = 0; i < num_elems; ++i) { input_begin[i] = load_callback(input_begin, i, &cbdata, nullptr); } break; } case fft_precision_single: { const size_t elem_size = sizeof(float); const size_t num_elems = input.front().size() / elem_size; auto input_begin = reinterpret_cast(input.front().data()); for(size_t i = 0; i < num_elems; ++i) { input_begin[i] = load_callback(input_begin, i, &cbdata, nullptr); } break; } case fft_precision_double: { const size_t elem_size = sizeof(double); const size_t num_elems = input.front().size() / elem_size; auto input_begin = reinterpret_cast(input.front().data()); for(size_t i = 0; i < num_elems; ++i) { input_begin[i] = load_callback(input_begin, i, &cbdata, nullptr); } break; } } } break; default: // this is FFTW data which should always be interleaved (if complex) abort(); } } #else // Stubs for callback tests. // Many seem to be called unconditionally, so we can't throw exceptions in // most cases. void* get_load_callback_host(fft_array_type itype, fft_precision precision, bool round_trip_inverse = false) { return nullptr; } void apply_load_callback(const fft_params& params, std::vector& input) {} // implement result scaling as a store callback, as rocFFT tests do void apply_store_callback(const fft_params& params, std::vector& output) { if(params.scale_factor == 1.0) return; switch(params.precision) { case fft_precision_half: { const size_t elem_size = sizeof(_Float16); for(auto& buf : output) { const size_t num_elems = buf.size() / elem_size; auto output_begin = reinterpret_cast<_Float16*>(buf.data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; element = static_cast(element) * params.scale_factor; } } break; } case fft_precision_single: { const size_t elem_size = sizeof(float); for(auto& buf : output) { const size_t num_elems = buf.size() / elem_size; auto output_begin = reinterpret_cast(buf.data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; element = element * params.scale_factor; } } break; } case fft_precision_double: { const size_t elem_size = sizeof(double); for(auto& buf : output) { const size_t num_elems = buf.size() / elem_size; auto output_begin = reinterpret_cast(buf.data()); for(size_t i = 0; i < num_elems; ++i) { auto& element = output_begin[i]; element = element * params.scale_factor; } } break; } } } void* get_store_callback_host(fft_array_type otype, fft_precision precision, bool round_trip_inverse = false) { throw std::runtime_error("get_store_callback_host not implemented"); return nullptr; } #endif hipFFT-rocm-5.7.1/clients/tests/hipfft_accuracy_test.h000066400000000000000000000025561445203054200227560ustar00rootroot00000000000000// Copyright (C) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #pragma once #ifndef ROCFFT_ACCURACY_TEST #define ROCFFT_ACCURACY_TEST #include "../hipfft_params.h" #include "../rocFFT/clients/tests/accuracy_test.h" void fft_vs_reference(hipfft_params& params, bool round_trip = false); #endif hipFFT-rocm-5.7.1/clients/tests/hipfft_test_params.h000066400000000000000000000024411445203054200224400ustar00rootroot00000000000000// Copyright (C) 2022 - 2022 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #pragma once #ifndef TESTCONSTANTS_H #define TESTCONSTANTS_H #include "hipfft.h" #include extern int verbose; extern size_t ramgb; #endif hipFFT-rocm-5.7.1/clients/tests/simple_test.cpp000066400000000000000000000573641445203054200214570ustar00rootroot00000000000000// Copyright (c) 2018 - 2022 Advanced Micro Devices, Inc. All rights // reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include "hipfft.h" #include #include #include #include #include "../hipfft_params.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS DISABLE_WARNING_RETURN_TYPE #include DISABLE_WARNING_POP // Function to return maximum error for float and double types. template inline double type_epsilon(); template <> inline double type_epsilon() { return 1e-6; } template <> inline double type_epsilon() { return 1e-7; } TEST(hipfftTest, Create1dPlan) { hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); size_t length = 1024; ASSERT_EQ(hipfftPlan1d(&plan, length, HIPFFT_C2C, 1), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); } TEST(hipfftTest, CreatePlanMany) { int const rank = 3; int const nX = 64; int const nY = 128; int const nZ = 23; int n[3] = {nX, nY, nZ}; int inembed[3] = {nX, nY, nZ}; int* inembed_null = nullptr; int const istride = 1; int const idist = nX * nY * nZ; int onembed[3] = {nX, nY, nZ}; int* onembed_null = nullptr; int const ostride = 1; int const odist = nX * nY * nZ; hipfftType type = HIPFFT_C2C; int const batch = 1000; size_t workSize; // Tests plan creation with null and not null // combinations of inembed and onembed. // // Valid combinations: // inembed == null && onembed == null // or // inembed != null && onembed != null // // otherwise HIPFFT_INVALID_VALUE should be // returned to maintain compatibility with cuFFT // inembed == null && onembed == null { hipfftHandle plan_valid_1 = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan_valid_1), HIPFFT_SUCCESS); auto ret_hipfft = hipfftMakePlanMany(plan_valid_1, rank, (int*)n, inembed_null, istride, idist, onembed_null, ostride, odist, type, batch, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_SUCCESS) << "inembed == null && onembed == null failed: " << hipfftResult_string(ret_hipfft); ASSERT_EQ(hipfftSetAutoAllocation(plan_valid_1, 0), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan_valid_1), HIPFFT_SUCCESS); } // inembed != null && onembed != null { hipfftHandle plan_valid_2 = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan_valid_2), HIPFFT_SUCCESS); auto ret_hipfft = hipfftMakePlanMany(plan_valid_2, rank, (int*)n, inembed, istride, idist, onembed, ostride, odist, type, batch, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_SUCCESS) << "inembed != null && onembed != null failed: " << hipfftResult_string(ret_hipfft); ASSERT_EQ(hipfftSetAutoAllocation(plan_valid_2, 0), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan_valid_2), HIPFFT_SUCCESS); } // inembed != null && onembed == null { hipfftHandle plan_invalid_1 = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan_invalid_1), HIPFFT_SUCCESS); auto ret_hipfft = hipfftMakePlanMany(plan_invalid_1, rank, (int*)n, inembed, istride, idist, onembed_null, ostride, odist, type, batch, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_INVALID_VALUE) << "inembed != null && onembed == null failed: " << hipfftResult_string(ret_hipfft); ASSERT_EQ(hipfftDestroy(plan_invalid_1), HIPFFT_SUCCESS); } // inembed == null && onembed != null { hipfftHandle plan_invalid_2 = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan_invalid_2), HIPFFT_SUCCESS); auto ret_hipfft = hipfftMakePlanMany(plan_invalid_2, rank, (int*)n, inembed_null, istride, idist, onembed, ostride, odist, type, batch, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_INVALID_VALUE) << "inembed == null && onembed != null failed: " << hipfftResult_string(ret_hipfft); ASSERT_EQ(hipfftDestroy(plan_invalid_2), HIPFFT_SUCCESS); } } TEST(hipfftTest, CreatePlanMany64) { int const rank = 3; long long int const nX = 64; long long int const nY = 128; long long int const nZ = 23; long long int n[3] = {nX, nY, nZ}; long long int n_invalid[3] = {nX, -nY, nZ}; long long int inembed[3] = {nX, nY, nZ}; long long int const istride = 1; long long int const idist = nX * nY * nZ; long long int onembed[3] = {nX, nY, nZ}; long long int onembed_invalid[3] = {nX, nY, -nZ}; long long int const ostride = 1; long long int const odist = nX * nY * nZ; hipfftType type = HIPFFT_C2C; long long int const batch = 1000; long long int const batch_invalid = -2; size_t workSize; // Tests the 64-bit version of plan creation // with valid/invalid data layouts. // First test with a valid data layout { hipfftHandle plan_valid = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan_valid), HIPFFT_SUCCESS); auto ret_hipfft = hipfftMakePlanMany64(plan_valid, rank, (long long int*)n, inembed, istride, idist, onembed, ostride, odist, type, batch, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_SUCCESS); ASSERT_EQ(hipfftSetAutoAllocation(plan_valid, 0), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan_valid), HIPFFT_SUCCESS); } // invalid data layout (n array has a negative entry) { hipfftHandle plan_invalid_1 = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan_invalid_1), HIPFFT_SUCCESS); auto ret_hipfft = hipfftMakePlanMany64(plan_invalid_1, rank, (long long int*)n_invalid, inembed, istride, idist, onembed, ostride, odist, type, batch, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_INVALID_VALUE); ASSERT_EQ(hipfftSetAutoAllocation(plan_invalid_1, 0), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan_invalid_1), HIPFFT_SUCCESS); } // invalid data layout (onembed array has a negative entry) { hipfftHandle plan_invalid_2 = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan_invalid_2), HIPFFT_SUCCESS); auto ret_hipfft = hipfftMakePlanMany64(plan_invalid_2, rank, (long long int*)n, inembed, istride, idist, onembed_invalid, ostride, odist, type, batch, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_INVALID_SIZE); ASSERT_EQ(hipfftSetAutoAllocation(plan_invalid_2, 0), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan_invalid_2), HIPFFT_SUCCESS); } // invalid data layout (batch is negative) { hipfftHandle plan_invalid_3 = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan_invalid_3), HIPFFT_SUCCESS); auto ret_hipfft = hipfftMakePlanMany64(plan_invalid_3, rank, (long long int*)n, inembed, istride, idist, onembed, ostride, odist, type, batch_invalid, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_INVALID_SIZE); ASSERT_EQ(hipfftSetAutoAllocation(plan_invalid_3, 0), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan_invalid_3), HIPFFT_SUCCESS); } } TEST(hipfftTest, hipfftGetSizeMany) { int const rank = 3; int const nX = 33; int const nY = 128; int const nZ = 100; int n[3] = {nX, nY, nZ}; int inembed[3] = {nX, nY, nZ}; int const istride = 1; int const idist = nX * nY * nZ; int onembed[3] = {nX, nY, nZ}; int const ostride = 1; int const odist = nX * nY * nZ; hipfftType type = HIPFFT_C2C; int const batch = 1; size_t workSize; hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); auto ret_hipfft = hipfftGetSizeMany(plan, rank, (int*)n, inembed, istride, idist, onembed, ostride, odist, type, batch, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_SUCCESS); ASSERT_EQ(hipfftSetAutoAllocation(plan, 0), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); } TEST(hipfftTest, hipfftGetSizeMany64) { int const rank = 3; long long int const nX = 133; long long int const nY = 354; long long int const nZ = 256; long long int n[3] = {nX, nY, nZ}; long long int inembed[3] = {nX, nY, nZ}; long long int const istride = 1; long long int const idist = nX * nY * nZ; long long int onembed[3] = {nX, nY, nZ}; long long int const ostride = 1; long long int const odist = nX * nY * nZ; hipfftType type = HIPFFT_C2C; long long int const batch = 2; size_t workSize; hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); auto ret_hipfft = hipfftGetSizeMany64(plan, rank, (long long int*)n, inembed, istride, idist, onembed, ostride, odist, type, batch, &workSize); ASSERT_EQ(ret_hipfft, HIPFFT_SUCCESS); ASSERT_EQ(hipfftSetAutoAllocation(plan, 0), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); } TEST(hipfftTest, CheckBufferSizeC2C) { hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); size_t n = 1024; size_t workSize = 0; ASSERT_EQ(hipfftMakePlan1d(plan, n, HIPFFT_C2C, 1, &workSize), HIPFFT_SUCCESS); #ifdef __HIP_PLATFORM_AMD__ // No extra work buffer for C2C EXPECT_EQ(workSize, 0); #endif ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); } TEST(hipfftTest, CheckBufferSizeR2C) { hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); size_t n = 2048; size_t workSize = 0; ASSERT_EQ(hipfftMakePlan1d(plan, n, HIPFFT_R2C, 1, &workSize), HIPFFT_SUCCESS); #ifdef __HIP_PLATFORM_AMD__ // NOTE: keep this condition for ease of changing n for ad-hoc tests // // cppcheck-suppress knownConditionTrueFalse if(n % 2 == 0) { EXPECT_EQ(workSize, 0); } else { EXPECT_EQ(workSize, 2 * n * sizeof(float)); } #endif EXPECT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); } TEST(hipfftTest, CheckBufferSizeC2R) { hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); size_t n = 2048; size_t workSize = 0; ASSERT_EQ(hipfftMakePlan1d(plan, n, HIPFFT_C2R, 1, &workSize), HIPFFT_SUCCESS); #ifdef __HIP_PLATFORM_AMD__ // NOTE: keep this condition for ease of changing n for ad-hoc tests // // cppcheck-suppress knownConditionTrueFalse if(n % 2 == 0) { EXPECT_EQ(workSize, 0); } else { EXPECT_EQ(workSize, 2 * n * sizeof(float)); } #endif ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); } TEST(hipfftTest, CheckBufferSizeD2Z) { hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); size_t n = 2048; size_t batch = 1000; size_t workSize = 0; ASSERT_EQ(hipfftMakePlan1d(plan, n, HIPFFT_D2Z, batch, &workSize), HIPFFT_SUCCESS); #ifdef __HIP_PLATFORM_AMD__ // NOTE: keep this condition for ease of changing n for ad-hoc tests // // cppcheck-suppress knownConditionTrueFalse if(n % 2 == 0) { EXPECT_EQ(workSize, 0); } else { EXPECT_EQ(workSize, 2 * n * sizeof(double)); } #endif ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); } TEST(hipfftTest, CheckBufferSizeZ2D) { hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); size_t n = 2048; size_t batch = 1000; size_t workSize = 0; ASSERT_EQ(hipfftMakePlan1d(plan, n, HIPFFT_Z2D, batch, &workSize), HIPFFT_SUCCESS); #ifdef __HIP_PLATFORM_AMD__ // NOTE: keep this condition for ease of changing n for ad-hoc tests // // cppcheck-suppress knownConditionTrueFalse if(n % 2 == 0) { EXPECT_EQ(workSize, 0); } else { EXPECT_EQ(workSize, 2 * n * sizeof(double)); } #endif ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); } #ifdef __HIP_PLATFORM_AMD__ TEST(hipfftTest, CheckNullWorkBuffer) { hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); size_t n = 2048; size_t batch = 1000; size_t workSize = 0; ASSERT_EQ(hipfftMakePlan1d(plan, n, HIPFFT_Z2D, batch, &workSize), HIPFFT_SUCCESS); EXPECT_EQ(hipfftSetWorkArea(plan, nullptr), HIPFFT_SUCCESS); ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); } #endif TEST(hipfftTest, RunR2C) { const size_t N = 4096; float in[N]; for(size_t i = 0; i < N; i++) in[i] = i + (i % 3) - (i % 7); hipfftReal* d_in; hipfftComplex* d_out; ASSERT_EQ(hipMalloc(&d_in, N * sizeof(hipfftReal)), hipSuccess); ASSERT_EQ(hipMalloc(&d_out, (N / 2 + 1) * sizeof(hipfftComplex)), hipSuccess); ASSERT_EQ(hipMemcpy(d_in, in, N * sizeof(hipfftReal), hipMemcpyHostToDevice), hipSuccess); hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); size_t workSize; ASSERT_EQ(hipfftMakePlan1d(plan, N, HIPFFT_R2C, 1, &workSize), HIPFFT_SUCCESS); EXPECT_EQ(hipfftExecR2C(plan, d_in, d_out), HIPFFT_SUCCESS); std::vector out(N / 2 + 1); ASSERT_EQ(hipMemcpy(&out[0], d_out, (N / 2 + 1) * sizeof(hipfftComplex), hipMemcpyDeviceToHost), hipSuccess); ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); ASSERT_EQ(hipFree(d_in), hipSuccess); ASSERT_EQ(hipFree(d_out), hipSuccess); ; // NOTE: keep this condition for ease of changing n for ad-hoc tests // // cppcheck-suppress knownConditionTrueFalse if(N % 2 != 0) { EXPECT_TRUE(workSize != 0); } double ref_in[N]; for(size_t i = 0; i < N; i++) ref_in[i] = in[i]; fftw_complex* ref_out; fftw_plan ref_p; ref_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * (N / 2 + 1)); ref_p = fftw_plan_dft_r2c_1d(N, ref_in, ref_out, FFTW_ESTIMATE); fftw_execute(ref_p); double maxv = 0; double nrmse = 0; // normalized root mean square error for(size_t i = 0; i < (N / 2 + 1); i++) { // printf("element %d: FFTW result %f, %f; hipFFT result %f, %f \n", (int)i, ref_out[i][0], ref_out[i][1], out[i].x, out[i].y); double dr = ref_out[i][0] - out[i].x; double di = ref_out[i][1] - out[i].y; maxv = fabs(ref_out[i][0]) > maxv ? fabs(ref_out[i][0]) : maxv; maxv = fabs(ref_out[i][1]) > maxv ? fabs(ref_out[i][1]) : maxv; nrmse += ((dr * dr) + (di * di)); } nrmse /= (double)((N / 2 + 1)); nrmse = sqrt(nrmse); nrmse /= maxv; EXPECT_TRUE(nrmse < type_epsilon()); fftw_free(ref_out); } // ask for a transform whose parameters are only valid out-of-place. // since hipFFT generates both in-place and out-place plans up front // (because it's not told about the placement until exec time), this // ensures that a failure to create an in-place plan doesn't prevent // the out-place plan from working. TEST(hipfftTest, OutplaceOnly) { int N_in = 4; int N_out = N_in / 2 + 1; float in[N_in]; for(int i = 0; i < N_in; i++) in[i] = i + (i % 3) - (i % 7); hipfftReal* d_in; hipfftComplex* d_out; ASSERT_EQ(hipMalloc(&d_in, N_in * sizeof(hipfftReal)), hipSuccess); ASSERT_EQ(hipMalloc(&d_out, N_out * sizeof(hipfftComplex)), hipSuccess); ASSERT_EQ(hipMemcpy(d_in, in, N_in * sizeof(hipfftReal), hipMemcpyHostToDevice), hipSuccess); hipfftHandle plan = hipfft_params::INVALID_PLAN_HANDLE; ASSERT_EQ(hipfftCreate(&plan), HIPFFT_SUCCESS); ASSERT_EQ(hipfftPlanMany(&plan, 1, &N_in, &N_in, 1, N_in, &N_out, 1, N_out, HIPFFT_R2C, 1), HIPFFT_SUCCESS); ASSERT_EQ(plan == hipfft_params::INVALID_PLAN_HANDLE, false); ASSERT_EQ(hipfftExecR2C(plan, d_in, d_out), HIPFFT_SUCCESS) << "hipfftExecR2C failed"; std::vector out(N_out); ASSERT_EQ(hipMemcpy(out.data(), d_out, N_out * sizeof(hipfftComplex), hipMemcpyDeviceToHost), hipSuccess); // in-place transform isn't really *supposed* to work - this // might or might not fail but we can at least check that it // doesn't blow up. //hipfftExecR2C(plan, reinterpret_cast(d_out), d_out); ASSERT_EQ(hipfftDestroy(plan), HIPFFT_SUCCESS); ASSERT_EQ(hipFree(d_in), hipSuccess); ASSERT_EQ(hipFree(d_out), hipSuccess); double ref_in[N_in]; for(int i = 0; i < N_in; i++) ref_in[i] = in[i]; fftw_complex* ref_out; fftw_plan ref_p; ref_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * N_out); ref_p = fftw_plan_dft_r2c_1d(N_in, ref_in, ref_out, FFTW_ESTIMATE); fftw_execute(ref_p); double maxv = 0; double nrmse = 0; // normalized root mean square error for(int i = 0; i < N_out; i++) { // printf("element %d: FFTW result %f, %f; hipFFT result %f, %f \n", (int)i, ref_out[i][0], ref_out[i][1], out[i].x, out[i].y); double dr = ref_out[i][0] - out[i].x; double di = ref_out[i][1] - out[i].y; maxv = fabs(ref_out[i][0]) > maxv ? fabs(ref_out[i][0]) : maxv; maxv = fabs(ref_out[i][1]) > maxv ? fabs(ref_out[i][1]) : maxv; nrmse += ((dr * dr) + (di * di)); } nrmse /= (double)(N_out); nrmse = sqrt(nrmse); nrmse /= maxv; ASSERT_TRUE(nrmse < type_epsilon()); fftw_free(ref_out); } hipFFT-rocm-5.7.1/cmake/000077500000000000000000000000001445203054200146615ustar00rootroot00000000000000hipFFT-rocm-5.7.1/cmake/dependencies.cmake000066400000000000000000000072131445203054200203140ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ############################################################################# # Git find_package(Git REQUIRED) # HIP if( NOT CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) if( NOT BUILD_WITH_LIB STREQUAL "CUDA" ) if( WIN32 ) find_package( HIP CONFIG REQUIRED ) else() find_package( HIP MODULE REQUIRED ) endif() list( APPEND HIP_INCLUDE_DIRS "${HIP_ROOT_DIR}/include" ) endif() else() if( BUILD_WITH_LIB STREQUAL "CUDA" ) set(HIP_INCLUDE_DIRS "${HIP_ROOT_DIR}/include") else() if( WIN32 ) find_package( HIP CONFIG REQUIRED ) else() find_package( HIP MODULE REQUIRED ) endif() endif() endif() # Either rocfft or cufft is required if(NOT BUILD_WITH_LIB STREQUAL "CUDA") find_package(rocfft REQUIRED) else() find_package(CUDA REQUIRED) endif() # ROCm find_package( ROCM 0.7.3 CONFIG PATHS /opt/rocm ) if(NOT ROCM_FOUND) set( rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download" ) set( PROJECT_EXTERN_DIR "${CMAKE_CURRENT_BINARY_DIR}/extern" ) file( DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip STATUS status LOG log) list(GET status 0 status_code) list(GET status 1 status_string) if(NOT status_code EQUAL 0) message(WARNING "error: downloading 'https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip' failed status_code: ${status_code} status_string: ${status_string} log: ${log} ") else() message(STATUS "downloading... done") execute_process( COMMAND ${CMAKE_COMMAND} -E tar xzvf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.zip WORKING_DIRECTORY ${PROJECT_EXTERN_DIR} ) execute_process( COMMAND ${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=${PROJECT_EXTERN_DIR}/rocm-cmake . WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag} ) execute_process( COMMAND ${CMAKE_COMMAND} --build rocm-cmake-${rocm_cmake_tag} --target install WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}) find_package( ROCM 0.7.3 CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake ) endif() endif() if( ROCM_FOUND ) message(STATUS "Found ROCm") include(ROCMSetupVersion) include(ROCMCreatePackage) include(ROCMInstallTargets) include(ROCMPackageConfigHelpers) include(ROCMInstallSymlinks) include(ROCMClients) include(ROCMHeaderWrapper) else() message(WARNING "Could not find rocm-cmake, packaging will fail.") endif( ) hipFFT-rocm-5.7.1/cmake/get-cli-arguments.cmake000066400000000000000000000044221445203054200212140ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ############################################################################# # Attempt (best effort) to return a list of user specified parameters cmake was invoked with # NOTE: Even if the user specifies CMAKE_INSTALL_PREFIX on the command line, the parameter is # not returned because it does not have the matching helpstring function( append_cmake_cli_arguments initial_cli_args return_cli_args ) # Retrieves the contents of CMakeCache.txt get_cmake_property( cmake_properties CACHE_VARIABLES ) foreach( property ${cmake_properties} ) get_property(help_string CACHE ${property} PROPERTY HELPSTRING ) # Properties specified on the command line have boilerplate text if( help_string MATCHES "variable specified on the command line" ) # message( STATUS "property: ${property}") # message( STATUS "value: ${${property}}") list( APPEND cli_args "-D${property}=${${property}}") endif( ) endforeach( ) # message( STATUS "get_command_line_arguments: ${cli_args}") set( ${return_cli_args} ${${initial_cli_args}} ${cli_args} PARENT_SCOPE ) endfunction( )hipFFT-rocm-5.7.1/cmake/package-functions.cmake000066400000000000000000000043271445203054200212720ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ############################################################################# # ######################################################################## # A helper function to generate packaging scripts to register libraries with system # ######################################################################## function( write_rocm_package_script_files scripts_write_dir library_name library_link_name ) set( ld_conf_file "/etc/ld.so.conf.d/${library_name}-dev.conf" ) file( WRITE ${scripts_write_dir}/postinst "#!/bin/bash set -e do_ldconfig() { echo ${CPACK_PACKAGING_INSTALL_PREFIX}/${LIB_INSTALL_DIR} > ${ld_conf_file} && ldconfig } case \"\$1\" in configure) do_ldconfig ;; abort-upgrade|abort-remove|abort-deconfigure) echo \"\$1\" ;; *) exit 0 ;; esac " ) file( WRITE ${scripts_write_dir}/prerm "#!/bin/bash set -e rm_ldconfig() { rm -f ${ld_conf_file} && ldconfig } case \"\$1\" in remove|purge) rm_ldconfig ;; *) exit 0 ;; esac " ) endfunction( ) hipFFT-rocm-5.7.1/cmake/verbose.cmake000066400000000000000000000062641445203054200173400ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ############################################################################# message(STATUS "hipfft_VERSION : ${hipfft_VERSION}") message(STATUS "\t==>CMAKE_BUILD_TYPE : ${CMAKE_BUILD_TYPE}") message(STATUS "\t==>BUILD_SHARED_LIBS : ${BUILD_SHARED_LIBS}") message(STATUS "\t==>CMAKE_INSTALL_PREFIX link : ${CMAKE_INSTALL_PREFIX}") message(STATUS "\t==>CMAKE_MODULE_PATH link : ${CMAKE_MODULE_PATH}") message(STATUS "\t==>CMAKE_PREFIX_PATH link : ${CMAKE_PREFIX_PATH}") message(STATUS "==============") message(STATUS "\t==>CMAKE_SYSTEM_NAME : ${CMAKE_SYSTEM_NAME}") message(STATUS "\t>>=HIP_ROOT_DIR : ${HIP_ROOT_DIR}") message(STATUS "\t==>CMAKE_CXX_COMPILER : ${CMAKE_CXX_FLAGS}") message(STATUS "\t==>CMAKE_CXX_COMPILER_VERSION : ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS "\t==>CMAKE_CXX_COMPILER debug : ${CMAKE_CXX_FLAGS_DEBUG}") message(STATUS "\t==>CMAKE_CXX_COMPILER release : ${CMAKE_CXX_FLAGS_RELEASE}") message(STATUS "\t==>CMAKE_CXX_COMPILER relwithdebinfo : ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") message(STATUS "\t==>CMAKE_EXE_LINKER_FLAGS : ${CMAKE_EXE_LINKER_FLAGS}") message(STATUS "\t==>CMAKE_EXE_LINKER_FLAGS_RELEASE : ${CMAKE_EXE_LINKER_FLAGS_RELEASE}") message(STATUS "\t==>CMAKE_SHARED_LINKER_FLAGS : ${CMAKE_SHARED_LINKER_FLAGS}") message(STATUS "\t==>CMAKE_SHARED_LINKER_FLAGS_RELEASE : ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}") message(STATUS "==============" ) message(STATUS "\t==>CMAKE_SHARED_LIBRARY_C_FLAGS : ${CMAKE_SHARED_LIBRARY_C_FLAGS}") message(STATUS "\t==>CMAKE_SHARED_LIBRARY_CXX_FLAGS : ${CMAKE_SHARED_LIBRARY_CXX_FLAGS}") message(STATUS "\t==>CMAKE_SHARED_LINKER_FLAGS : ${CMAKE_SHARED_LINKER_FLAGS}") message(STATUS "\t==>CMAKE_SHARED_LINKER_FLAGS_DEBUG : ${CMAKE_SHARED_LINKER_FLAGS_DEBUG}") message(STATUS "\t==>CMAKE_SHARED_LINKER_FLAGS_RELEASE : ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}")hipFFT-rocm-5.7.1/deps/000077500000000000000000000000001445203054200145345ustar00rootroot00000000000000hipFFT-rocm-5.7.1/deps/CMakeLists.txt000066400000000000000000000102741445203054200173000ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################# # Helper cmake script to automate building dependencies for hipfft # This script can be invoked manually by the user with 'cmake -P' # The ROCm platform requires Ubuntu 16.04 or Fedora 24, which has cmake 3.5 cmake_minimum_required( VERSION 3.5 ) list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../cmake ) # Consider removing this in the future # It can be annoying for visual studio developers to build a project that tries to install into 'program files' if( WIN32 AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" FORCE ) endif( ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() # The superbuild does not build anything itself; all compiling is done in external projects project( hipfft-dependencies NONE ) option( BUILD_BOOST "Download and build boost library" ON ) option( BUILD_GTEST "Download and build googletest library" ON ) # option( BUILD_VERBOSE "Print helpful build debug information" OFF ) # if( BUILD_VERBOSE ) # message( STATUS "CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}" ) # message( STATUS "CMAKE_BINARY_DIR: ${CMAKE_BINARY_DIR}" ) # message( STATUS "CMAKE_SOURCE_DIR: ${CMAKE_SOURCE_DIR}" ) # message( STATUS "CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}" ) # message( STATUS "CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}" ) # message( STATUS "CMAKE_CURRENT_LIST_DIR: ${CMAKE_CURRENT_LIST_DIR}" ) # message( STATUS "CMAKE_CURRENT_LIST_FILE: ${CMAKE_CURRENT_LIST_FILE}" ) # endif( ) # This module scrapes the CMakeCache.txt file and attempts to get all the cli options the user specified to cmake invocation include( get-cli-arguments ) # The following is a series of super-build projects; this cmake project will download and build if( BUILD_GTEST ) include( external-gtest ) list( APPEND hipfft_dependencies googletest ) set( gtest_custom_target COMMAND cd ${GTEST_BINARY_ROOT}$ ${CMAKE_COMMAND} --build . --target install ) endif( ) if( BUILD_BOOST ) include( external-boost ) list( APPEND hipfft_dependencies boost ) set( boost_custom_target COMMAND cd ${BOOST_BINARY_ROOT}$ ${Boost.Command} install ) endif( ) # POLICY CMP0037 - "Target names should not be reserved and should match a validity pattern" # Familiar target names like 'install' should be OK at the super-build level if( POLICY CMP0037 ) cmake_policy( SET CMP0037 OLD ) endif( ) add_custom_target( install ${boost_custom_target} ${gtest_custom_target} DEPENDS ${hipfft_dependencies} ) hipFFT-rocm-5.7.1/deps/external-boost.cmake000066400000000000000000000171171445203054200205130ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################# message( STATUS "Configuring boost external dependency" ) include( ExternalProject ) set( PREFIX_BOOST ${CMAKE_INSTALL_PREFIX} CACHE PATH "Location where boost should install, defaults to /usr/local" ) # We need to detect the compiler the user is attempting to invoke with CMake, # we do our best to translate cmake parameters into bjam parameters enable_language( CXX ) include( build-bitness ) # TODO: Options should be added to allow downloading Boost straight from github # This file is used to add Boost as a library dependency to another project # This sets up boost to download from sourceforge, and builds it as a cmake # ExternalProject # Change this one line to upgrade to newer versions of boost set( ext.Boost_VERSION "1.64.0" CACHE STRING "Boost version to download/use" ) mark_as_advanced( ext.Boost_VERSION ) string( REPLACE "." "_" ext.Boost_Version_Underscore ${ext.Boost_VERSION} ) message( STATUS "ext.Boost_VERSION: " ${ext.Boost_VERSION} ) if( WIN32 ) # For newer cmake versions, 7z archives are much smaller to download if( CMAKE_VERSION VERSION_LESS "3.1.0" ) set( Boost_Ext "zip" ) else( ) set( Boost_Ext "7z" ) endif( ) else( ) set( Boost_Ext "tar.bz2" ) endif( ) if( WIN32 ) set( Boost.Command b2 --prefix=${PREFIX_BOOST} ) else( ) set( Boost.Command ./b2 --prefix=${PREFIX_BOOST} ) endif( ) if( CMAKE_COMPILER_IS_GNUCXX ) list( APPEND Boost.Command cxxflags=-fPIC -std=c++11 ) elseif( XCODE_VERSION OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang") ) list( APPEND Boost.Command cxxflags=-std=c++11 -stdlib=libc++ linkflags=-stdlib=libc++ ) endif( ) include( ProcessorCount ) ProcessorCount( Cores ) if( NOT Cores EQUAL 0 ) # Travis can fail to build Boost sporadically; uses 32 cores, reduce stress on VM if( DEFINED ENV{TRAVIS} ) if( Cores GREATER 8 ) set( Cores 8 ) endif( ) endif( ) # Add build thread in addition to the number of cores that we have math( EXPR Cores "${Cores} + 1 " ) else( ) # If we could not detect # of cores, assume 1 core and add an additional build thread set( Cores "2" ) endif( ) message( STATUS "ExternalBoost using ( " ${Cores} " ) cores to build with" ) message( STATUS "ExternalBoost building [ program_options, serialization, filesystem, system, regex ] components" ) list( APPEND Boost.Command -j ${Cores} --with-program_options --with-serialization --with-filesystem --with-system --with-regex ) if( BUILD_64 ) list( APPEND Boost.Command address-model=64 ) else( ) list( APPEND Boost.Command address-model=32 ) endif( ) if( MSVC10 ) list( APPEND Boost.Command toolset=msvc-10.0 ) elseif( MSVC11 ) list( APPEND Boost.Command toolset=msvc-11.0 ) elseif( MSVC12 ) list( APPEND Boost.Command toolset=msvc-12.0 ) elseif( MSVC14 ) list( APPEND Boost.Command toolset=msvc-14.0 ) elseif( XCODE_VERSION OR ( CMAKE_CXX_COMPILER_ID MATCHES "Clang" ) ) list( APPEND Boost.Command toolset=clang ) elseif( CMAKE_COMPILER_IS_GNUCXX ) list( APPEND Boost.Command toolset=gcc ) endif( ) if( WIN32 AND (ext.Boost_VERSION VERSION_LESS "1.60.0") ) list( APPEND Boost.Command define=BOOST_LOG_USE_WINNT6_API ) endif( ) if( NOT DEFINED ext.Boost_LINK ) if( ${BUILD_SHARED_LIBS} MATCHES "ON" ) set( ext.Boost_LINK "shared" CACHE STRING "Which boost link method? static | shared | static,shared" ) else( ) set( ext.Boost_LINK "static" CACHE STRING "Which boost link method? static | shared | static,shared" ) endif( ) endif() mark_as_advanced( ext.Boost_LINK ) if( WIN32 ) # Versioned is the default on windows set( ext.Boost_LAYOUT "versioned" CACHE STRING "Which boost layout method? versioned | tagged | system" ) # For windows, default to build both variants to support the VS IDE set( ext.Boost_VARIANT "debug,release" CACHE STRING "Which boost variant? debug | release | debug,release" ) else( ) # Tagged builds provide unique enough names to be able to build both variants set( ext.Boost_LAYOUT "tagged" CACHE STRING "Which boost layout method? versioned | tagged | system" ) # For Linux, typically a build tree only needs one variant if( ${CMAKE_BUILD_TYPE} MATCHES "Debug") set( ext.Boost_VARIANT "debug" CACHE STRING "Which boost variant? debug | release | debug,release" ) else( ) set( ext.Boost_VARIANT "release" CACHE STRING "Which boost variant? debug | release | debug,release" ) endif( ) endif( ) mark_as_advanced( ext.Boost_LAYOUT ) mark_as_advanced( ext.Boost_VARIANT ) list( APPEND Boost.Command --layout=${ext.Boost_LAYOUT} link=${ext.Boost_LINK} variant=${ext.Boost_VARIANT} ) message( STATUS "Boost.Command: ${Boost.Command}" ) # If the user has a cached local copy stored somewhere, they can define the full path to the package in a BOOST_URL environment variable if( DEFINED ENV{BOOST_URL} ) set( ext.Boost_URL "$ENV{BOOST_URL}" CACHE STRING "URL to download Boost from" ) else( ) set( ext.Boost_URL "http://sourceforge.net/projects/boost/files/boost/${ext.Boost_VERSION}/boost_${ext.Boost_Version_Underscore}.${Boost_Ext}/download" CACHE STRING "URL to download Boost from" ) endif( ) mark_as_advanced( ext.Boost_URL ) set( Boost.Bootstrap "" ) set( ext.HASH "" ) if( WIN32 ) set( Boost.Bootstrap "bootstrap.bat" ) if( CMAKE_VERSION VERSION_LESS "3.1.0" ) # .zip file set( ext.HASH "b99973c805f38b549dbeaf88701c0abeff8b0e8eaa4066df47cac10a32097523" ) else( ) # .7z file set( ext.HASH "49c6abfeb5b480f6a86119c0d57235966b4690ee6ff9e6401ee868244808d155" ) endif( ) else( ) set( Boost.Bootstrap "./bootstrap.sh" ) # .tar.bz2 set( ext.HASH "7bcc5caace97baa948931d712ea5f37038dbb1c5d89b43ad4def4ed7cb683332" ) if( XCODE_VERSION OR ( CMAKE_CXX_COMPILER_ID MATCHES "Clang" ) ) list( APPEND Boost.Bootstrap --with-toolset=clang ) endif( ) endif( ) # Below is a fancy CMake command to download, build and install Boost on the users computer ExternalProject_Add( boost PREFIX ${CMAKE_BINARY_DIR}/boost URL ${ext.Boost_URL} URL_HASH SHA256=${ext.HASH} UPDATE_COMMAND ${Boost.Bootstrap} LOG_UPDATE 1 CONFIGURE_COMMAND "" BUILD_COMMAND ${Boost.Command} stage BUILD_IN_SOURCE 1 LOG_BUILD 1 INSTALL_COMMAND "" ) set_property( TARGET boost PROPERTY FOLDER "extern" ) ExternalProject_Get_Property( boost install_dir ) ExternalProject_Get_Property( boost binary_dir ) # For use by the user of ExternalGtest.cmake set( BOOST_INSTALL_ROOT ${install_dir} ) set( BOOST_BINARY_ROOT ${binary_dir} ) hipFFT-rocm-5.7.1/deps/external-gtest.cmake000066400000000000000000000115061445203054200205070ustar00rootroot00000000000000# ############################################################################# # Copyright (C) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ############################################################################# message( STATUS "Configuring gtest external dependency" ) include( ExternalProject ) # set( gtest_cmake_args -DCMAKE_INSTALL_PREFIX=/package ) set( PREFIX_GTEST ${CMAKE_INSTALL_PREFIX} CACHE PATH "Location where boost should install, defaults to /usr/local" ) set( gtest_cmake_args -DCMAKE_INSTALL_PREFIX=${PREFIX_GTEST} ) append_cmake_cli_arguments( gtest_cmake_args gtest_cmake_args ) set( gtest_git_repository "https://github.com/google/googletest.git" CACHE STRING "URL to download gtest from" ) set( gtest_git_tag "release-1.8.0" CACHE STRING "URL to download gtest from" ) if( MSVC ) list( APPEND gtest_cmake_args -Dgtest_force_shared_crt=ON -DCMAKE_DEBUG_POSTFIX=d ) # else( ) # GTEST_USE_OWN_TR1_TUPLE necessary to compile with hipcc # list( APPEND gtest_cmake_args -DGTEST_USE_OWN_TR1_TUPLE=1 ) endif( ) if( CMAKE_CONFIGURATION_TYPES ) set( gtest_make COMMAND ${CMAKE_COMMAND} --build --config Release COMMAND ${CMAKE_COMMAND} --build --config Debug ) else( ) # Add build thread in addition to the number of cores that we have include( ProcessorCount ) ProcessorCount( Cores ) # If we are not using an IDE, assume nmake with visual studio if( MSVC ) set( gtest_make "nmake" ) else( ) set( gtest_make "make" ) # The -j paramter does not work with nmake if( NOT Cores EQUAL 0 ) math( EXPR Cores "${Cores} + 1 " ) list( APPEND gtest_make -j ${Cores} ) else( ) # If we could not detect # of cores, assume 1 core and add an additional build thread list( APPEND gtest_make -j 2 ) endif( ) endif( ) message( STATUS "ExternalGmock using ( " ${Cores} " ) cores to build with" ) endif( ) # message( STATUS "gtest_make ( " ${gtest_make} " ) " ) # message( STATUS "gtest_cmake_args ( " ${gtest_cmake_args} " ) " ) # Master branch has a new structure that combines googletest with googlemock ExternalProject_Add( googletest PREFIX ${CMAKE_BINARY_DIR}/gtest GIT_REPOSITORY ${gtest_git_repository} GIT_TAG ${gtest_git_tag} CMAKE_ARGS ${gtest_cmake_args} BUILD_COMMAND ${gtest_make} LOG_BUILD 1 INSTALL_COMMAND "" LOG_INSTALL 1 ) ExternalProject_Get_Property( googletest source_dir ) # For visual studio, the path 'debug' is hardcoded because that is the default VS configuration for a build. # Doesn't matter if its the gtest or gtestd project above set( package_dir "${PREFIX_GTEST}" ) if( CMAKE_CONFIGURATION_TYPES ) # Create a package by bundling libraries and header files if( BUILD_64 ) set( LIB_DIR lib64 ) else( ) set( LIB_DIR lib ) endif( ) set( gtest_lib_dir "/${LIB_DIR}" ) ExternalProject_Add_Step( googletest createPackage COMMAND ${CMAKE_COMMAND} -E copy_directory ${gtest_lib_dir}/Debug ${package_dir}/${LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory ${gtest_lib_dir}/Release ${package_dir}/${LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory ${gtest_lib_dir}/Debug ${package_dir}/${LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory ${gtest_lib_dir}/Release ${package_dir}/${LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory /include ${package_dir}/include COMMAND ${CMAKE_COMMAND} -E copy_directory /gtest/include/gtest ${package_dir}/include/gtest DEPENDEES install ) endif( ) set_property( TARGET googletest PROPERTY FOLDER "extern") ExternalProject_Get_Property( googletest install_dir ) ExternalProject_Get_Property( googletest binary_dir ) # For use by the user of ExternalGtest.cmake set( GTEST_INSTALL_ROOT ${install_dir} ) set( GTEST_BINARY_ROOT ${binary_dir} ) hipFFT-rocm-5.7.1/docs/000077500000000000000000000000001445203054200145315ustar00rootroot00000000000000hipFFT-rocm-5.7.1/docs/.doxygen/000077500000000000000000000000001445203054200162645ustar00rootroot00000000000000hipFFT-rocm-5.7.1/docs/.doxygen/Doxyfile000066400000000000000000003210101445203054200177670ustar00rootroot00000000000000# Doxyfile 1.8.10 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "hipFFT" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = v1.0.12 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = "prototype interfaces compatible with HIPm platform and HiP" # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = docBin # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after phipessing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: # FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: # Fortran. In the later case the parser tries to guess whether the code is fixed # or free formatted code, this is the default for Fortran type files), VHDL. For # instance to make doxygen treat .inc files as Fortran files (default is PHP), # and .f files as C (default is Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-phipesses all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown phipessing is further phipessed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = YES # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = YES # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive phipess and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- SHOW_NAMESPACES = NO # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO, these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = hipfft.h # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, # *.vhdl, *.ucf, *.qsf, *.as and *.js. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.idl \ *.ddl \ *.odl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.cs \ *.d \ *.php \ *.php4 \ *.php5 \ *.phtml \ *.inc \ *.m \ *.markdown \ *.md \ *.mm \ *.dox \ *.py \ *.f90 \ *.f \ *.for \ *.tcl \ *.vhd \ *.vhdl \ *.ucf \ *.qsf \ *.as \ *.js # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = ../README.md #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: http://clang.llvm.org/) for more accurate parsing at the # cost of reduced performance. This can be particularly helpful with template # rich C++ code for which doxygen's built-in parser lacks the necessary type # information. # Note: The availability of this option depends on whether or not doxygen was # compiled with the --with-libclang option. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 1 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /